Imported Upstream version 1.5.1

This commit is contained in:
Mario Fetka
2020-09-22 02:25:22 +02:00
commit 434d6067d9
2103 changed files with 928962 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
/*
* Copyright (c) 2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "cbfs/cbfs_enumeration_context.h"
#include "xtreemfs/MRC.pb.h"
namespace xtreemfs {
CbFSEnumerationContext::CbFSEnumerationContext()
: offset(0), dir_entries(NULL), next_index(0) {}
CbFSEnumerationContext::~CbFSEnumerationContext() {
delete dir_entries;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,109 @@
/*
* Copyright (c) 2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "cbfs/cbfs_options.h"
#include <boost/lexical_cast.hpp>
#include <boost/program_options/cmdline.hpp>
#include <boost/tokenizer.hpp>
#include <iostream>
#include <sstream>
#include "libxtreemfs/helper.h"
#include "libxtreemfs/xtreemfs_exception.h"
using namespace std;
namespace po = boost::program_options;
namespace style = boost::program_options::command_line_style;
namespace xtreemfs {
CbFSOptions::CbFSOptions() : Options(), cbfs_descriptions_("CbFS Options") {
// Windows Explorer copies files in 1 MB chunks and therefore more than
// the default 128 kB.
async_writes_max_request_size_kb = 1024 * 1024;
// CbFS options.
helptext_usage_ =
"mount.xtreemfs: Mounts an XtreemFS Volume.\n"
"\n"
"Usage: \n"
"\tmount.xtreemfs [options] [pbrpc[g|s]://]<dir-host>[:port]"
"/<volume-name> <drive letter>\n"
"\n"
" Example: mount.xtreemfs localhost/myVolume X:\n";
}
void CbFSOptions::ParseCommandLine(int argc, char** argv) {
// Parse general options and retrieve unregistered options for own parsing.
vector<string> options = Options::ParseCommandLine(argc, argv);
// Read Volume URL and mount point from command line.
po::positional_options_description p;
p.add("dir_volume_url", 1);
p.add("mount_point", 1);
po::options_description mount("Mount options");
mount.add_options()
("dir_volume_url", po::value(&xtreemfs_url), "volume to mount")
("mount_point", po::value(&mount_point), "where to mount the volume");
// Parse command line.
po::options_description all_descriptions_;
all_descriptions_.add(mount).add(cbfs_descriptions_);
po::variables_map vm;
try {
po::store(po::command_line_parser(options)
.options(all_descriptions_)
.positional(p)
.style(style::default_style & ~style::allow_guessing)
.run(), vm);
po::notify(vm);
} catch(const std::exception& e) {
// Rethrow boost errors due to invalid command line parameters.
throw InvalidCommandLineParametersException(string(e.what()));
}
// Do not check parameters if the help shall be shown.
if (show_help || empty_arguments_list || show_version) {
return;
}
// Extract information from command line.
Options::ParseURL(kDIR);
// Check for required parameters.
if (service_addresses.empty()) {
throw InvalidCommandLineParametersException("missing DIR host.");
}
if (volume_name.empty()) {
throw InvalidCommandLineParametersException("missing volume name.");
}
if (mount_point.empty()) {
throw InvalidCommandLineParametersException("missing mount point.");
}
}
std::string CbFSOptions::ShowCommandLineUsage() {
return helptext_usage_
+ "\nFor complete list of options, please specify -h or --help.\n";
}
std::string CbFSOptions::ShowCommandLineHelp() {
ostringstream stream;
// No help text given in descriptions for positional mount options. Instead
// the usage is explained here.
stream << helptext_usage_
<< endl
// Descriptions of this class.
<< cbfs_descriptions_
// Descriptions of the general options.
<< Options::ShowCommandLineHelp();
return stream.str();
}
} // namespace xtreemfs

View File

@@ -0,0 +1,90 @@
/*
* Copyright (c) 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*/
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <boost/scoped_ptr.hpp>
#include <CbFS.h>
#include <vector>
#include "cbfs/cbfs_adapter.h"
#include "cbfs/cbfs_options.h"
#include "libxtreemfs/client.h"
#include "libxtreemfs/file_handle.h"
#include "libxtreemfs/helper.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/volume.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "pbrpc/RPC.pb.h" // xtreemfs::pbrpc::UserCredentials
#include "util/logging.h"
#include "xtreemfs/MRC.pb.h" // xtreemfs::pbrpc::Stat
#ifdef _MSC_VER
// Disable "warning C4996: 'strdup': The POSIX name for this item is deprecated. Instead, use the ISO C++ conformant name: _strdup. // NOLINT
#pragma warning(push)
#pragma warning(disable:4996)
#endif // _MSC_VER
using namespace std;
using namespace xtreemfs;
using namespace xtreemfs::util;
int __cdecl wmain(ULONG argc, PWCHAR argv[]) {
vector<char*> argv_utf8;
argv_utf8.reserve(argc);
for (ULONG i = 0; i < argc; i++) {
argv_utf8.push_back(strdup(ConvertWindowsToUTF8(argv[i]).c_str()));
}
CbFSOptions cbfs_options;
bool invalid_commandline_parameters = false;
try {
cbfs_options.ParseCommandLine(argv_utf8.size(), &argv_utf8[0]);
} catch(const xtreemfs::XtreemFSException& e) {
cout << "Invalid parameters found, error: " << e.what() << endl << endl;
invalid_commandline_parameters = true;
}
for (size_t i = 0; i < argv_utf8.size(); i++) {
delete[] argv_utf8[i];
}
// Display help if needed.
if (cbfs_options.empty_arguments_list || invalid_commandline_parameters) {
cout << cbfs_options.ShowCommandLineUsage() << endl;
return 1;
}
if (cbfs_options.show_help) {
cout << cbfs_options.ShowCommandLineHelp() << endl;
return 1;
}
// Show only the version.
if (cbfs_options.show_version) {
cout << cbfs_options.ShowVersion("mount.xtreemfs") << endl;
return 1;
}
boost::scoped_ptr<CbFSAdapter> cbfs_adapter(new CbFSAdapter(&cbfs_options));
try {
cbfs_adapter->Start();
cout << "Volume successfully mounted. Eject it in Windows to un-mount it." << endl;
cbfs_adapter->WaitForEjection();
cbfs_adapter->StopWithoutUnmount();
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "Did shutdown the XtreemFS client." << endl;
}
} catch (const XtreemFSException& e) {
Logging::log->getLog(LEVEL_ERROR)
<< "Failed to mount the volume. Error: " << e.what() << endl;
}
// libxtreemfs shuts down logger.
return 0;
}

View File

@@ -0,0 +1,119 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include <cstring>
#include <iostream>
#include <string>
#include "libxtreemfs/client.h"
#include "libxtreemfs/file_handle.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/volume.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "pbrpc/RPC.pb.h" // xtreemfs::pbrpc::UserCredentials
#include "xtreemfs/MRC.pb.h" // xtreemfs::pbrpc::Stat
using namespace std;
int main(int argc, char* argv[]) {
// Every operation is executed in the context of a given user and his groups.
// The UserCredentials object does store this information and is currently
// (08/2011) *only* evaluated by the MRC (although the protocol requires to
// send user_credentials to DIR and OSD, too).
xtreemfs::pbrpc::UserCredentials user_credentials;
user_credentials.set_username("example_libxtreemfs");
user_credentials.add_groups("example_libxtreemfs");
// Class which allows to change options of the library.
xtreemfs::Options options;
try {
options.ParseCommandLine(argc, argv);
} catch(const xtreemfs::XtreemFSException& e) {
cout << "Invalid parameters found, error: " << e.what() << endl << endl;
return 1;
}
xtreemfs::Client* client = NULL;
xtreemfs::FileHandle* file = NULL;
int return_code = 0;
try {
// Create a new instance of a client using the DIR service at
// 'demo.xtreemfs.org' (default port 32638).
client = xtreemfs::Client::CreateClient(
"demo.xtreemfs.org:32638",
user_credentials,
NULL, // No SSL options.
options);
// Start the client (a connection to the DIR service will be setup).
client->Start();
// Open a volume named 'demo'.
xtreemfs::Volume *volume = NULL;
volume = client->OpenVolume("demo",
NULL, // No SSL options.
options);
// Open a file.
file = volume->OpenFile(user_credentials,
"/example_libxtreemfs.txt",
static_cast<xtreemfs::pbrpc::SYSTEM_V_FCNTL>(
xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_CREAT |
xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_TRUNC |
xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_RDWR),
511); // = 777 Octal.
// Write to file.
cout << "Writing the string\n"
"\n"
"\t\"Accessed XtreemFS through the C++ libxtreemfs.\"\n"
"\n"
"to the file example_libxtreemfs.txt..." << endl;
char write_buf[] = "Accessed XtreemFS through the C++ libxtreemfs.";
file->Write(reinterpret_cast<const char*>(&write_buf),
sizeof(write_buf),
0);
// Get file attributes.
xtreemfs::pbrpc::Stat stat;
volume->GetAttr(user_credentials, "/example_libxtreemfs.txt", &stat);
cout << "\nNew file size of example_libxtreemfs.txt: "
<< stat.size() << " Bytes." << endl;
// Once again, now hopefully from the Cache.
volume->GetAttr(user_credentials, "/example_libxtreemfs.txt", &stat);
cout << "\nFile size of example_libxtreemfs.txt again (this time retrieved"
" from the enabled metadata cache): " << stat.size() << endl;
// Read from the file.
const size_t buffer_size = 128 * 1024; // 128kB, default object size.
char read_buf[buffer_size];
memset(&read_buf, 0, buffer_size);
file->Read(reinterpret_cast<char*>(&read_buf),
buffer_size, // Length.
0); // Offset.
cout << "\nReading the content of the file example_libxtreemfs.txt:\n\n"
<< read_buf << endl;
} catch(const xtreemfs::XtreemFSException& e) {
cout << "An error occurred:\n" << e.what() << endl;
return_code = 1;
}
if (file != NULL) {
// Close the file (no need to delete it, see documentation volume.h).
file->Close();
}
if (client != NULL) {
// Shutdown() does also invoke a volume->Close().
client->Shutdown();
delete client;
}
return return_code;
}

View File

@@ -0,0 +1,171 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include <cstring>
#include <iostream>
#include <list>
#include <string>
#include "libxtreemfs/client.h"
#include "libxtreemfs/file_handle.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/volume.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "pbrpc/RPC.pb.h" // xtreemfs::pbrpc::UserCredentials
#include "xtreemfs/MRC.pb.h" // xtreemfs::pbrpc::Stat
using namespace std;
int main(int argc, char* argv[]) {
// Every operation is executed in the context of a given user and his groups.
// The UserCredentials object does store this information and is currently
// (08/2011) *only* evaluated by the MRC (although the protocol requires to
// send user_credentials to DIR and OSD, too).
xtreemfs::pbrpc::UserCredentials user_credentials;
user_credentials.set_username("example_libxtreemfs");
user_credentials.add_groups("example_libxtreemfs");
// Class which allows to change options of the library.
xtreemfs::Options options;
try {
options.ParseCommandLine(argc, argv);
} catch(const xtreemfs::XtreemFSException& e) {
cout << "Invalid parameters found, error: " << e.what() << endl << endl;
return 1;
}
xtreemfs::Client* client = NULL;
xtreemfs::FileHandle* file = NULL;
int return_code = 0;
try {
// Create a new instance of a client using the DIR service at
// localhost because we need extended priviliges.
// This requires the DIR with etc/xos/xtreemfs/dirconfig.test,
// an OSD with osdconfig.test, an OSD with osdconfig2.test and
// an MRC with mrcconfig.test, all of them on localhost.
client = xtreemfs::Client::CreateClient(
"localhost:32638",
user_credentials,
NULL, // No SSL options.
options);
// Start the client (a connection to the DIR service will be setup).
client->Start();
// setup the auth object
xtreemfs::pbrpc::Auth auth = xtreemfs::pbrpc::Auth::default_instance();
auth.set_auth_type(xtreemfs::pbrpc::AUTH_NONE);
// Create a new volume named 'demo'.
xtreemfs::pbrpc::Volumes *volumes = client->ListVolumes("localhost:32636", auth);
bool has_volume = false;
for(int i = 0; i < volumes->volumes_size() && !has_volume; ++i) {
has_volume = volumes->volumes(i).name().compare("demo") == 0;
}
if(has_volume) {
client->DeleteVolume("localhost:32636", auth, user_credentials, "demo");
}
client->CreateVolume("localhost:32636", auth, user_credentials, "demo");
// Open the volume.
xtreemfs::Volume *volume = NULL;
volume = client->OpenVolume("demo",
NULL, // No SSL options.
options);
// Open a file.
file = volume->OpenFile(user_credentials,
"/example_replication.txt",
static_cast<xtreemfs::pbrpc::SYSTEM_V_FCNTL>(
xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_CREAT |
xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_TRUNC |
xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_RDWR),
511); // = 777 Octal.
// Write to file.
cout << "Writing the string\n"
"\n"
"\t\"Replication Example.\"\n"
"\n"
"to the file example_replication.txt..." << endl;
char write_buf[] = "Replication Example.";
file->Write(reinterpret_cast<const char*>(&write_buf),
sizeof(write_buf),
0);
cout << endl << "Closing /example_replication.txt... ";
file->Close();
file = NULL;
cout << "ok!" << endl;
// mark the file as read-only
cout << endl << "Marking /example_replication.txt read only... ";
volume->SetXAttr(user_credentials, "/example_replication.txt", "xtreemfs.read_only", "true", xtreemfs::pbrpc::XATTR_FLAGS_CREATE);
cout << "ok!" << endl;
// list replica(s) and their OSD(s)
// we expect one replica and one OSD here because we created a new volume above
xtreemfs::pbrpc::Replicas* replicas = volume->ListReplicas(user_credentials, "/example_replication.txt");
const int repls = replicas->replicas_size();
cout << endl << repls << " replica(s) for /example_replication.txt:" << endl;
for(int i = 0; i < repls; ++i) {
xtreemfs::pbrpc::Replica replica = replicas->replicas(i);
const int osds = replica.osd_uuids_size();
cout << "\t" << osds << " OSD(s) for replica " << i << ":";
for(int j = 0; j < osds; ++j) {
cout << " " << replica.osd_uuids(j);
}
cout << endl;
}
// grab one suitable OSD which we can use for manual replication of the file
list<string> osd_uuids;
volume->GetSuitableOSDs(user_credentials, "/example_replication.txt", 1, &osd_uuids);
// replicate to second OSD if available
if(osd_uuids.size() > 0) {
string osd_uuid = osd_uuids.front();
cout << endl << "Replicating to suitable OSD " << osd_uuid << "... ";
// add replication
xtreemfs::pbrpc::Replica replica;
replica.add_osd_uuids(osd_uuid);
// read-only files have partial replication by default, we want full
replica.set_replication_flags(xtreemfs::pbrpc::REPL_FLAG_FULL_REPLICA | xtreemfs::pbrpc::REPL_FLAG_STRATEGY_RAREST_FIRST);
xtreemfs::pbrpc::StripingPolicy *striping = new xtreemfs::pbrpc::StripingPolicy;
striping->set_type(xtreemfs::pbrpc::STRIPING_POLICY_RAID0);
striping->set_stripe_size(128);
striping->set_width(1);
replica.set_allocated_striping_policy(striping);
volume->AddReplica(user_credentials, "/example_replication.txt", replica);
cout << "ok!" << endl;
} else {
cout << endl << "No second OSD found for replication." << endl;
}
} catch(const xtreemfs::XtreemFSException& e) {
cout << "An error occurred:\n" << e.what() << endl;
return_code = 1;
}
if (file != NULL) {
// Close the file (no need to delete it, see documentation volume.h).
file->Close();
}
if (client != NULL) {
// Shutdown() does also invoke a volume->Close().
client->Shutdown();
delete client;
}
return return_code;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,407 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "fuse/fuse_operations.h"
#include "fuse/fuse_adapter.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::util;
xtreemfs::FuseAdapter* fuse_adapter = NULL;
int xtreemfs_fuse_getattr(const char *path, struct stat *statbuf) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "getattr on path " << path << endl;
}
return fuse_adapter->getattr(path, statbuf);
}
int xtreemfs_fuse_readlink(const char *path, char *link, size_t size) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "xtreemfs_fuse_readlink on path " << path << endl;
}
return fuse_adapter->readlink(path, link, size);
}
int xtreemfs_fuse_mknod(const char *path, mode_t mode, dev_t dev) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_mknod on path "
<< path << endl;
}
return fuse_adapter->mknod(path, mode, dev);
}
int xtreemfs_fuse_mkdir(const char *path, mode_t mode) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_mkdir on path "
<< path << endl;
}
return fuse_adapter->mkdir(path, mode);
}
int xtreemfs_fuse_unlink(const char *path) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_unlink " << path
<< endl;
}
return fuse_adapter->unlink(path);
}
int xtreemfs_fuse_rmdir(const char *path) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_rmdir on path " << path
<< endl;
}
return fuse_adapter->rmdir(path);
}
int xtreemfs_fuse_symlink(const char *path, const char *link) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_symlink on path "
<< path << endl;
}
return fuse_adapter->symlink(path, link);
}
int xtreemfs_fuse_rename(const char *path, const char *newpath) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "xtreemfs_fuse_rename on path " << path << " to " << newpath <<
endl;
}
return fuse_adapter->rename(path, newpath);
}
int xtreemfs_fuse_link(const char *path, const char *newpath) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "xtreemfs_fuse_link on path " << path << " " << newpath << endl;
}
return fuse_adapter->link(path, newpath);
}
int xtreemfs_fuse_chmod(const char *path, mode_t mode) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_chmod on path " << path
<< endl;
}
return fuse_adapter->chmod(path, mode);
}
int xtreemfs_fuse_lock(const char* path,
struct fuse_file_info *fi,
int cmd,
struct flock* flock_) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
string log_command;
switch(cmd) {
case F_GETLK:
log_command = "check lock";
break;
case F_SETLK:
log_command = "set lock";
break;
case F_SETLKW:
log_command = "set lock and wait";
break;
default:
log_command = "unknown lock command";
break;
}
string log_type;
switch(flock_->l_type) {
case F_UNLCK:
log_type = "unlock";
break;
case F_RDLCK:
log_type = "read lock";
break;
case F_WRLCK:
log_type = "write lock";
break;
default:
log_type = "unknown lock type";
break;
}
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_lock on path " << path
<< " command: " << log_command << " type: " << log_type << " start: "
<< flock_->l_start << " length: "<< flock_->l_len << " pid: "
<< flock_->l_pid << endl;
}
return fuse_adapter->lock(path, fi, cmd, flock_);
}
int xtreemfs_fuse_chown(const char *path, uid_t uid, gid_t gid) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_chown on path " << path
<< endl;
}
return fuse_adapter->chown(path, uid, gid);
}
int xtreemfs_fuse_truncate(const char *path, off_t new_file_size) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "xtreemfs_fuse_truncate on path " << path
<< " size:" << new_file_size << endl;
}
return fuse_adapter->truncate(path, new_file_size);
}
int xtreemfs_fuse_utime(const char *path, struct utimbuf *ubuf) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_utime on path " << path
<< endl;
}
return fuse_adapter->utime(path, ubuf);
}
int xtreemfs_fuse_utimens(const char *path, const struct timespec tv[2]) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_utimens on path "
<< path << endl;
}
return fuse_adapter->utimens(path, tv);
}
int xtreemfs_fuse_open(const char *path, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_open on path " << path
<< endl;
}
return fuse_adapter->open(path, fi);
}
int xtreemfs_fuse_release(const char *path, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_release " << path
<< endl;
}
return fuse_adapter->release(path, fi);
}
int xtreemfs_fuse_read(
const char *path, char *buf,
size_t size, off_t offset, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_read " << path
<< " s:" << size << " o:" << offset << endl;
}
int count = fuse_adapter->read(path, buf, size, offset, fi);
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_read finished " << path
<< " s:" << size << " o:" << offset << " r:" << count << endl;
}
return count;
}
int xtreemfs_fuse_write(const char *path, const char *buf, size_t size,
off_t offset, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_write " << path
<< " s: " << size << " o:" << offset << endl;
}
int count = fuse_adapter->write(path, buf, size, offset, fi);
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_write finished " << path
<< " s:" << size << " o:" << offset << " w:" << count << endl;
}
return count;
}
int xtreemfs_fuse_statfs(const char *path, struct statvfs *statv) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_statfs " << path
<< endl;
}
return fuse_adapter->statfs(path, statv);
}
/** Unlink fsync(), flush() requests are NOT initiated from the user.
*
* Instead, flush() is a Fuse internal mechanism to avoid the problem that
* the return value of release() will be ignored.
*
* Therefore, a flush() will be called by Fuse with every close() executed by
* the user. Only errors returned by this flush() operation can be returned
* to the close() of the user.
*/
int xtreemfs_fuse_flush(const char *path, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_flush " << path
<< endl;
}
return fuse_adapter->flush(path, fi);
}
int xtreemfs_fuse_fsync(const char *path, int datasync,
struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_fsync " << path
<< endl;
}
// We ignore the datasync parameter as all metadata operations are
// synchronous and therefore never have to be flushed.
return fuse_adapter->flush(path, fi);
}
int xtreemfs_fuse_setxattr(
const char *path, const char *name,
const char *value, size_t size, int flags
#ifdef __APPLE__
, uint32_t position
#endif
) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "xtreemfs_fuse_setxattr " << " " << path << " " << name << endl;
}
return fuse_adapter->setxattr(path, name, value, size, flags);
}
int xtreemfs_fuse_getxattr(
const char *path, const char *name, char *value, size_t size
#ifdef __APPLE__
, uint32_t position
#endif
) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "xtreemfs_fuse_getxattr " << " " << path << " " << name << " "
<< size << endl;
}
return fuse_adapter->getxattr(path, name, value, size);
}
int xtreemfs_fuse_listxattr(const char *path, char *list, size_t size) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "xtreemfs_fuse_listxattr " << path << " " << size << endl;
}
return fuse_adapter->listxattr(path, list, size);
}
int xtreemfs_fuse_removexattr(const char *path, const char *name) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "xtreemfs_fuse_removexattr " << " " << path << " " << name << endl;
}
return fuse_adapter->removexattr(path, name);
}
int xtreemfs_fuse_opendir(const char *path, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_opendir " << path
<< endl;
}
return fuse_adapter->opendir(path, fi);
}
int xtreemfs_fuse_readdir(
const char *path, void *buf,
fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_readdir " << path
<< endl;
}
return fuse_adapter->readdir(path, buf, filler, offset, fi);
}
int xtreemfs_fuse_releasedir(const char *path, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG) && path != NULL) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_releasedir " << path
<< endl;
}
return fuse_adapter->releasedir(path, fi);
}
int xtreemfs_fuse_fsyncdir(
const char *path, int datasync, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_fsyncdir " << path
<< endl;
}
// Like fsync, but for directories - not required for XtreemFS.
return 0;
}
void *xtreemfs_fuse_init(struct fuse_conn_info *conn) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_init " << endl;
}
// http://sourceforge.net/apps/mediawiki/fuse/index.php?title=Fuse_file_info
// TODO(mberlin): Check for valid parameters.
conn->async_read = 5;
conn->max_readahead = 10 * 128 * 1024;
conn->max_write = 128 * 1024;
#if FUSE_MAJOR_VERSION > 2 || (FUSE_MAJOR_VERSION == 2 && FUSE_MINOR_VERSION >= 8) // NOLINT
conn->capable
= FUSE_CAP_ASYNC_READ | FUSE_CAP_BIG_WRITES
| FUSE_CAP_ATOMIC_O_TRUNC | FUSE_CAP_POSIX_LOCKS;
conn->want
= FUSE_CAP_ASYNC_READ | FUSE_CAP_BIG_WRITES
| FUSE_CAP_ATOMIC_O_TRUNC | FUSE_CAP_POSIX_LOCKS;
#endif
struct fuse_context* context = fuse_get_context();
return context->private_data;
}
void xtreemfs_fuse_destroy(void *userdata) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_destroy " << endl;
}
}
/**
* This method will only be called by Fuse if "-o default_permissions" is not
* send to Fuse (for instance before changing the working directory).
*
* If "-o default_permissions" is enabled, Fuse does determine on its own, based
* on the result of the getattr, if the user is allowed to access the directory.
*/
int xtreemfs_fuse_access(const char *path, int mask) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xtreemfs_fuse_access " << path
<< endl;
}
return fuse_adapter->access(path, mask);
}
int xtreemfs_fuse_create(const char *path, mode_t mode,
struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "create on path " << path << endl;
}
return fuse_adapter->create(path, mode, fi);
}
int xtreemfs_fuse_ftruncate(
const char *path, off_t new_file_size, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "xtreemfs_fuse_ftruncate on path " << path
<< " size:" << new_file_size << endl;
}
return fuse_adapter->ftruncate(path, new_file_size, fi);
}
int xtreemfs_fuse_fgetattr(
const char *path, struct stat *statbuf, struct fuse_file_info *fi) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "fgetattr on path " << path << endl;
}
return fuse_adapter->fgetattr(path, statbuf, fi);
}

View File

@@ -0,0 +1,261 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "fuse/fuse_options.h"
#include <csignal>
#include <boost/lexical_cast.hpp>
#include <boost/program_options/cmdline.hpp>
#include <boost/tokenizer.hpp>
#include <iostream>
#include <sstream>
#include "libxtreemfs/helper.h"
#include "libxtreemfs/xtreemfs_exception.h"
using namespace std;
namespace po = boost::program_options;
namespace style = boost::program_options::command_line_style;
namespace xtreemfs {
FuseOptions::FuseOptions() : Options(), fuse_descriptions_("Fuse Options") {
// Overwrite certain members of Options().
#ifndef __linux
// Interrupting read calls does not work with Linux Fuse.
max_read_tries = 0;
#endif
// Never give up to execute a request as we enabled interruption.
max_tries = 0;
max_write_tries = 0;
async_writes_max_request_size_kb = 128; // FUSE specific limit.
// Default Fuse options.
#ifdef __APPLE__
// We assume that the MacFuse default timeout is 60 seconds on Leopard.
daemon_timeout = 60;
// Always enable xattrs for Mac.
enable_xattrs = true;
// If we are under Leopard or newer, reduce the timeout values.
if (GetMacOSXKernelVersion() >= 9) {
max_tries = 3;
max_read_tries = 3;
max_write_tries = 3;
retry_delay_s = 15;
connect_timeout_s = 15;
// The detection of a timeout may take twice the time, i.e. up to 18 seconds
// resulting in a total time of 3 * 18 (< 60 seconds daemon_timeout default).
request_timeout_s = 9;
}
#else
enable_xattrs = false;
#endif // __APPLE__
foreground = false;
use_fuse_permission_checks = true;
fuse_permission_checks_explicitly_disabled = false;
fuse_descriptions_.add_options()
("foreground,f", po::value(&foreground)->zero_tokens(),
"Do not fork into background.")
("fuse_option,o",
po::value< vector<string> >(&fuse_options),
"Passes -o=<option> to Fuse if not recognized by mount.xtreemfs, "
"see 'Alternative Specification of options'.")
("no-default-permissions",
po::value(&fuse_permission_checks_explicitly_disabled)->zero_tokens(),
"Do not pass -o default_permissions to Fuse (disables local Fuse"
" permissions checks).");
po::options_description fuse_options_information(
"ACL and extended attributes Support:\n"
" -o xtreemfs_acl Enable the correct evaluation of XtreemFS ACLs.\n"
" (Note that you cannot use the system tools getfattr\n"
" and setfattr; use 'xtfsutil' instead to set and\n"
" retrieve ACLs.)"
#ifndef __APPLE__
"\n -o user_xattr Enable user defined extended attributes.");
#else
);
#endif // __APPLE__
fuse_descriptions_.add(fuse_options_information);
helptext_usage_ =
"mount.xtreemfs: Mounts an XtreemFS Volume.\n"
"\n"
"Usage: \n"
"\tmount.xtreemfs [options] [pbrpc[g|s]://]<dir-host>[:port]/<volume-name>"
" <mount point>\n"
"\n"
" Example: mount.xtreemfs localhost/myVolume ~/xtreemfs\n";
}
void FuseOptions::ParseCommandLine(int argc, char** argv) {
// Parse general options and retrieve unregistered options for own parsing.
vector<string> options = Options::ParseCommandLine(argc, argv);
// Read Volume URL and mount point from command line.
po::positional_options_description p;
p.add("dir_volume_url", 1);
p.add("mount_point", 1);
po::options_description mount("Mount options");
mount.add_options()
("dir_volume_url", po::value(&xtreemfs_url), "volume to mount")
("mount_point", po::value(&mount_point), "where to mount the volume");
// Parse command line.
po::options_description all_descriptions_;
all_descriptions_.add(mount).add(fuse_descriptions_);
po::variables_map vm;
try {
po::store(po::command_line_parser(options)
.options(all_descriptions_)
.positional(p)
.style(style::default_style & ~style::allow_guessing)
.run(), vm);
po::notify(vm);
} catch(const std::exception& e) {
// Rethrow boost errors due to invalid command line parameters.
throw InvalidCommandLineParametersException(string(e.what()));
}
// Do not check parameters if the help shall be shown.
if (show_help || empty_arguments_list || show_version) {
return;
}
// Split list of comma separated -o options and add them as extra options.
list<string> split_options;
for (int i = 0; i < fuse_options.size(); i++) {
typedef boost::tokenizer< boost::char_separator<char> > tokenizer;
boost::char_separator<char> seperator(",");
tokenizer tokens(fuse_options[i], seperator);
// Check if there are at least two tokens and they have to be split up
tokenizer::iterator first_tokens = tokens.begin();
if (++first_tokens != tokens.end()) {
// Split tokens and add them to a temporary list.
for (tokenizer::iterator token = tokens.begin();
token != tokens.end();
++token) {
split_options.push_back(string(*token));
}
// Remove split tokens from fuse_options as they will be readded later.
fuse_options.erase(fuse_options.begin() + i);
i--;
}
}
// Readd split options.
for (list<string>::const_iterator iter = split_options.begin();
iter != split_options.end();
++iter) {
fuse_options.push_back(*iter);
}
// Evaluate certain Fuse options.
for (int i = 0; i < fuse_options.size(); i++) {
if (fuse_options[i] == "acl") {
throw InvalidCommandLineParametersException(
"The option -o acl is not supported. Specify -o xtreemfs_acl instead."
"\n\nWe do not allow -o acl because XtreemFS does not support the "
"getfacl and setfacl tools. You have to use 'xtfs_acl' instead "
"to set and retrieve ACLs.");
}
if (fuse_options[i] == "user_xattr") {
enable_xattrs = true;
// Don't send this option to Fuse.
fuse_options.erase(fuse_options.begin() + i);
i--;
continue;
}
if (fuse_options[i] == "xtreemfs_acl") {
// Fuse may prevent operations based on the evaluation of stat records
// although a user is allowed to due to further ACLs, so we disable this
// Fuse feature here.
use_fuse_permission_checks = false;
// Don't send this option to Fuse.
fuse_options.erase(fuse_options.begin() + i);
i--;
continue;
}
if (fuse_options[i] == "intr") {
// Don't send this option to Fuse.
fuse_options.erase(fuse_options.begin() + i);
i--;
throw InvalidCommandLineParametersException(
"The option -o intr will be ignored as command line parameter and"
" not passed through to Fuse. Use --interrupt-signal instead.");
}
if (fuse_options[i].substr(0, 12) == "intr_signal=") {
// Don't send this option to Fuse.
fuse_options.erase(fuse_options.begin() + i);
i--;
throw InvalidCommandLineParametersException(
"The option -o intr_signal will be ignored as command line "
"parameter and not passed through to Fuse. Use --interrupt-signal "
"instead.");
}
if (fuse_options[i] == "_netdev") {
// Don't send this option to Fuse, just ignore it, solves issue 276:
// https://code.google.com/p/xtreemfs/issues/detail?id=276
fuse_options.erase(fuse_options.begin() + i);
i--;
}
#ifdef __APPLE__
if (fuse_options[i].substr(0, 15) == "daemon_timeout=") {
try {
daemon_timeout = boost::lexical_cast<int>(fuse_options[i].substr(15));
continue;
} catch(const boost::bad_lexical_cast& e) {
throw InvalidCommandLineParametersException(
"The integer value after daemon_timeout could not be parsed: "
+ fuse_options[i].substr(15));
}
}
#endif
}
// Extract information from command line.
Options::ParseURL(kDIR);
// Check for required parameters.
if (service_addresses.empty()) {
throw InvalidCommandLineParametersException("missing DIR host.");
}
if (volume_name.empty()) {
throw InvalidCommandLineParametersException("missing volume name.");
}
if (mount_point.empty()) {
throw InvalidCommandLineParametersException("missing mount point.");
}
}
std::string FuseOptions::ShowCommandLineUsage() {
return helptext_usage_
+ "\nFor complete list of options, please specify -h or --help.\n";
}
std::string FuseOptions::ShowCommandLineHelp() {
ostringstream stream;
// No help text given in descriptions for positional mount options. Instead
// the usage is explained here.
stream << helptext_usage_
<< endl
// Descriptions of this class.
<< fuse_descriptions_
// Descriptions of the general options.
<< Options::ShowCommandLineHelp();
return stream.str();
}
} // namespace xtreemfs

View File

@@ -0,0 +1,283 @@
/*
* Copyright (c) 2010-2011 by Patrick Schaefer, Zuse Institute Berlin
* 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#define FUSE_USE_VERSION 26
#include <errno.h>
#include <csignal>
#include <cstdio>
#include <cstring>
#include <fuse.h>
#include <sys/stat.h>
#include <unistd.h>
#include <algorithm>
#include <iostream>
#include <list>
#include <string>
#include <vector>
#include "util/logging.h"
#include "fuse/fuse_adapter.h"
#include "fuse/fuse_operations.h"
#include "fuse/fuse_options.h"
#include "libxtreemfs/xtreemfs_exception.h"
using namespace std;
using namespace xtreemfs::util;
int main(int argc, char **argv) {
// Parse command line options.
xtreemfs::FuseOptions options;
bool invalid_commandline_parameters = false;
try {
options.ParseCommandLine(argc, argv);
} catch(const xtreemfs::XtreemFSException& e) {
cout << "Invalid parameters found, error: " << e.what() << endl << endl;
invalid_commandline_parameters = true;
}
// Display help if needed.
if (options.empty_arguments_list || invalid_commandline_parameters) {
cout << options.ShowCommandLineUsage() << endl;
return 1;
}
if (options.show_help) {
cout << options.ShowCommandLineHelp() << endl;
return 1;
}
// Show only the version.
if (options.show_version) {
cout << options.ShowVersion("mount.xtreemfs") << endl;
return 1;
}
// In case of background operation: Fork before threads are created.
int fd[2];
int kErrorBufferSize = 1024;
char error_output[kErrorBufferSize];
memset(&error_output, 0, kErrorBufferSize);
if (!options.foreground) {
if (pipe(fd) < 0) {
cerr << "Failed to create pipe. (Needed to send process to background.)";
return 2;
}
pid_t pid = fork();
if (pid < 0) {
cerr << "Failed to fork(). (Needed to send process to background.)";
return 3;
}
// Evaluate pipe from daemonized thread.
if (pid > 0) { // Parent
// Cleanup the static memory in the parent to pass the valgrind
// leak check.
google::protobuf::ShutdownProtobufLibrary();
signal(SIGINT, SIG_IGN); // Ignore interrupt signals in parent.
// Close write end.
close(fd[1]);
fd_set read_fds;
FD_ZERO(&read_fds);
FD_SET(fd[0], &read_fds);
// Wait until there is something to read at the other end.
select(fd[0]+1, &read_fds, NULL, NULL, NULL);
int count = read(fd[0], error_output, kErrorBufferSize);
if (count == 0) {
// No error found, exiting.
return 0;
} else {
printf("mount.xtreemfs failed: %s\n", error_output);
return 4;
}
} else { // Child.
// Close read end of pipe.
close(fd[0]);
}
}
// Child only from here.
// Run client and open volume.
list<char*> required_fuse_options;
try {
fuse_adapter = new xtreemfs::FuseAdapter(&options);
fuse_adapter->Start(&required_fuse_options);
} catch(const xtreemfs::XtreemFSException& e) {
if (options.foreground) {
cerr << "mount.xtreemfs failed: " << e.what() << endl;
} else {
// Tell parent about error: write error to pipe and exit.
write(fd[1], e.what(), min(static_cast<int>(strlen(e.what()) + 1),
kErrorBufferSize));
}
fuse_adapter->Stop();
delete fuse_adapter;
return 5;
}
// Setup fuse and pass client and volume objects.
struct fuse_chan* fuse_channel = NULL;
struct fuse* fuse_ = NULL;
char* mount_point = NULL;
// Fill in operations.
struct fuse_operations xtreemfs_fuse_ops = {0};
xtreemfs_fuse_ops.getattr = xtreemfs_fuse_getattr;
xtreemfs_fuse_ops.readlink = xtreemfs_fuse_readlink;
xtreemfs_fuse_ops.mknod = xtreemfs_fuse_mknod;
xtreemfs_fuse_ops.mkdir = xtreemfs_fuse_mkdir;
xtreemfs_fuse_ops.unlink = xtreemfs_fuse_unlink;
xtreemfs_fuse_ops.rmdir = xtreemfs_fuse_rmdir;
xtreemfs_fuse_ops.symlink = xtreemfs_fuse_symlink;
xtreemfs_fuse_ops.rename = xtreemfs_fuse_rename;
xtreemfs_fuse_ops.link = xtreemfs_fuse_link;
xtreemfs_fuse_ops.chmod = xtreemfs_fuse_chmod;
xtreemfs_fuse_ops.chown = xtreemfs_fuse_chown;
xtreemfs_fuse_ops.truncate = xtreemfs_fuse_truncate;
xtreemfs_fuse_ops.utime = xtreemfs_fuse_utime;
xtreemfs_fuse_ops.open = xtreemfs_fuse_open;
xtreemfs_fuse_ops.read = xtreemfs_fuse_read;
xtreemfs_fuse_ops.write = xtreemfs_fuse_write;
xtreemfs_fuse_ops.statfs = xtreemfs_fuse_statfs;
xtreemfs_fuse_ops.flush = xtreemfs_fuse_flush;
xtreemfs_fuse_ops.release = xtreemfs_fuse_release;
xtreemfs_fuse_ops.fsync = xtreemfs_fuse_fsync;
xtreemfs_fuse_ops.setxattr = xtreemfs_fuse_setxattr;
xtreemfs_fuse_ops.getxattr = xtreemfs_fuse_getxattr;
xtreemfs_fuse_ops.listxattr = xtreemfs_fuse_listxattr;
xtreemfs_fuse_ops.removexattr = xtreemfs_fuse_removexattr;
xtreemfs_fuse_ops.opendir = xtreemfs_fuse_opendir;
xtreemfs_fuse_ops.readdir = xtreemfs_fuse_readdir;
xtreemfs_fuse_ops.releasedir = xtreemfs_fuse_releasedir;
xtreemfs_fuse_ops.fsyncdir = xtreemfs_fuse_fsyncdir;
xtreemfs_fuse_ops.init = xtreemfs_fuse_init;
xtreemfs_fuse_ops.destroy = xtreemfs_fuse_destroy;
xtreemfs_fuse_ops.access = xtreemfs_fuse_access;
xtreemfs_fuse_ops.create = xtreemfs_fuse_create;
xtreemfs_fuse_ops.ftruncate = xtreemfs_fuse_ftruncate;
xtreemfs_fuse_ops.fgetattr = xtreemfs_fuse_fgetattr;
xtreemfs_fuse_ops.lock = xtreemfs_fuse_lock;
xtreemfs_fuse_ops.utimens = xtreemfs_fuse_utimens;
#if FUSE_MAJOR_VERSION > 2 || ( FUSE_MAJOR_VERSION == 2 && FUSE_MINOR_VERSION >= 8 ) // NOLINT
// We cannot work on unlinked files in case -ohard_remove was specified, so
// a null path is not okay.
xtreemfs_fuse_ops.flag_nullpath_ok = 0;
#endif // >= FUSE 2.8
#if FUSE_MAJOR_VERSION > 2 || ( FUSE_MAJOR_VERSION == 2 && FUSE_MINOR_VERSION >= 9 ) // NOLINT
// We require Fuse to calculate the "path" for all file handle operations.
xtreemfs_fuse_ops.flag_nopath = 0;
#endif // >= FUSE 2.8
// Forward args.
vector<char*> fuse_opts;
// Fuse does not parse the first parameter, thus set it to "mount.xtreemfs".
fuse_opts.push_back((strdup("mount.xtreemfs")));
for (int i = 0; i < options.fuse_options.size(); i++) {
// Prepend "-o" to every Fuse option.
fuse_opts.push_back(strdup(
(string("-o") + options.fuse_options[i]).c_str()));
}
for (list<char*>::iterator it = required_fuse_options.begin();
it != required_fuse_options.end(); ++it) {
fuse_opts.push_back((*it));
}
struct fuse_args fuse_args = FUSE_ARGS_INIT(
static_cast<int>(fuse_opts.size()), &fuse_opts[0]);
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "About to call fuse_mount using "
<< fuse_opts.size() << " parameters: " << endl;
for (int i = 0; i < fuse_opts.size(); i++) {
Logging::log->getLog(LEVEL_DEBUG) << "\t" << fuse_opts[i] << endl;
}
}
// Create Fuse channel (mount_point will be freed by fuse_teardown()).
mount_point = strdup(options.mount_point.c_str());
struct stat mount_point_stat;
stat(mount_point, &mount_point_stat);
if((mount_point_stat.st_mode & S_IFMT) != S_IFDIR) {
cerr << mount_point << " is not a directory." << endl;
return 1;
}
// Fuse docu: "manually add arguments to the struct fuse_args list"
fuse_opt_parse(&fuse_args, NULL, NULL, NULL);
fuse_channel = fuse_mount(mount_point, &fuse_args);
if (fuse_channel == NULL) {
fuse_opt_free_args(&fuse_args);
for (int i = 0; i < fuse_opts.size(); i++) {
free(fuse_opts[i]);
}
free(mount_point);
// Stop FuseAdapter.
fuse_adapter->Stop();
delete fuse_adapter;
return errno;
}
// Create Fuse filesystem.
fuse_ = fuse_new(
fuse_channel,
&fuse_args,
&xtreemfs_fuse_ops,
sizeof(xtreemfs_fuse_ops),
NULL);
fuse_opt_free_args(&fuse_args);
if (fuse_ == NULL) {
// Avoid "Transport endpoint is not connected" in case fuse_new failed.
fuse_unmount(mount_point, fuse_channel);
for (int i = 0; i < fuse_opts.size(); i++) {
free(fuse_opts[i]);
}
free(mount_point);
// Stop FuseAdapter.
fuse_adapter->Stop();
delete fuse_adapter;
return errno;
}
// Send to background.
if (!options.foreground) {
// Close write end of pipe as no error was encountered.
close(fd[1]);
// Daemonize. (Do everything (except for the fork()), the regular daemon()
// would have done, too.)
// 1. Change the file mode mask.
umask(0);
// 2. Create a new SID for the child process.
if (setsid() < 0) {
return 6;
}
// 3. Change the current working directory. This prevents the current
// directory from being locked; hence not being able to remove it.
if ((chdir("/")) < 0) {
return 7;
}
// 4. Redirect standard files to /dev/null.
freopen( "/dev/null", "r", stdin);
freopen( "/dev/null", "w", stdout);
freopen( "/dev/null", "w", stderr);
}
// Run fuse.
fuse_set_signal_handlers(fuse_get_session(fuse_));
fuse_adapter->SetInterruptQueryFunction();
fuse_loop_mt(fuse_);
// Cleanup
fuse_teardown(fuse_, mount_point);
for (int i = 0; i < fuse_opts.size(); i++) {
free(fuse_opts[i]);
}
// Stop FuseAdapter.
fuse_adapter->Stop();
delete fuse_adapter;
return 0;
}

4230
cpp/src/json/jsoncpp.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,118 @@
/*
* Copyright (c) 2014 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "ld_preload/environment.h"
#include <cstdlib>
#include <cstring>
#include <stdio.h>
#include <fcntl.h>
#include <list>
#include <string>
#include "libxtreemfs/client.h"
#include "libxtreemfs/file_handle.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/pbrpc_url.h"
#include "util/logging.h"
#include "ld_preload/misc.h"
#include "ld_preload/preload_options.h"
Environment::Environment() : volume_(NULL) {
xprintf("Environment::Environment()\n");
// get options string from the environment
const char program_name_dummy[] = "program "; // prefix dummy for options string
const char* options_env = std::getenv("XTREEMFS_PRELOAD_OPTIONS");
xprintf("Environment::Environment(): getenv returned: %p\n", options_env);
if (!options_env) {
exit(-1);
}
//const char options_env[] = "demo.xtreemfs.org/demo /home/b/bemnoack/bqcd/remote-test";
// test for null-string and exit
if (!options_env) {
xprintf("Environment::Environment(): error: XTREEMFS_PRELOAD_OPTIONS environment variable not set or empty.\n");
}
size_t options_c_str_length = std::strlen(options_env) + std::strlen(program_name_dummy);
char* options_c_str = new char[options_c_str_length + 1];
xprintf("Environment::Environment(): XTREEMFS_PRELOAD_OPTIONS='%s'\n", options_env);
std::strcpy(options_c_str, program_name_dummy);
std::strcat(options_c_str, options_env);
xprintf("Environment::Environment(): options_c_str='%s'\n", options_c_str);
// tokenise by space (overwrite first space after each token with null-termination)
char *pos;
std::vector<char*> arg_vector;
pos = strtok(options_c_str, " ");
arg_vector.push_back(pos);
while (pos != NULL) {
//xprintf("Environment::Environment(): token: %s\n", pos);
pos = strtok(NULL, " ");
arg_vector.push_back(pos); // last strtok returns NULL, by standard: argv[argc] == 0
}
xprintf("Environment::Environment()\n");
char** argv = new char*[arg_vector.size()];
for (int i = 0; i < arg_vector.size(); ++i) {
argv[i] = arg_vector[i];
}
int argc = arg_vector.size() - 1; // last NULL entry does not count
xprintf("Environment::Environment()\n");
// generate argc, argv like arguments for command line parsing
options_.ParseCommandLine(argc, argv);
xprintf("Environment::Environment()\n");
delete [] argv;
delete [] options_c_str;
xtreemfs::util::initialize_logger(options_.log_level_string,
options_.log_file_path,
xtreemfs::util::LEVEL_WARN);
xprintf("enable_async_writes: %d\n", options_.enable_async_writes);
// user credentials:
uid_t uid = getuid();
gid_t gid = getgid();
pid_t pid = getpid();
user_creds_.set_username(system_user_mapping_.UIDToUsername(uid));
std::list<std::string> groupnames;
system_user_mapping_.GetGroupnames(uid, gid, pid, &groupnames);
for (std::list<std::string>::iterator it = groupnames.begin();
it != groupnames.end(); ++it) {
user_creds_.add_groups(*it);
}
// client setup
xprintf("Environment::Environment(): Client setup start\n");
client_ = xtreemfs::Client::CreateClient(options_.service_addresses, user_creds_, NULL, options_);
client_->Start();
xprintf("Environment::Environment(): Client setup end\n");
// open volume
xprintf("Environment::Environment(): Opening volume %s\n", options_.volume_name.c_str());
volume_ = client_->OpenVolume(options_.volume_name, NULL, options_);
volume_name_ = options_.volume_name;
std::string prefix_env(options_.mount_point);
Path::SetXtreemFSPrefix(prefix_env);
}
Environment::~Environment() {
xprintf("Environment::~Environment(): Closing volume.\n");
// volume_ = client_->CloseVolume(volume_); // not in the Client interface, but performed implicitly in ClientImplementation::Shutdown
client_->Shutdown();
xprintf("Environment::~Environment()\n");
}
xtreemfs::Volume* Environment::GetVolume() {
return volume_;
}
xtreemfs::SystemUserMappingUnix& Environment::GetSystemUserMapping() {
return system_user_mapping_;
}

View File

@@ -0,0 +1,377 @@
/*
* Copyright (c) 2014 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
#include <fcntl.h>
#include "ld_preload/passthrough.h"
#include "ld_preload/preload.h"
/* Interceptor functions */
extern "C" {
int open(__const char *path, int flags, ...) {
initialize_passthrough_if_necessary();
xprintf(" open(%s)\n", path);
// if O_CREAT is set, mode MUST be specified and is ignored otherwise (at least on linux, see man 2 open)
// we only need to acquire the mode, when it must be specified, and ignore it otherwise
mode_t mode;
if (flags & O_CREAT) {
va_list ap;
va_start(ap, flags);
mode = va_arg(ap, mode_t);
va_end(ap);
}
if (overlay_initialized() && is_xtreemfs_path(path)) {
return xtreemfs_open(path, flags, mode);
} else {
xprintf(" open calling libc_open(%s)\n", path);
int ret = ((funcptr_open)libc_open)(path, flags, mode);
xprintf(" open libc_open(%s) returned %d\n", path, ret);
return ret;
}
}
int open64(__const char *path, int flags, ...) {
initialize_passthrough_if_necessary();
xprintf(" open64(%s)\n", path);
mode_t mode;
if (flags & O_CREAT) {
va_list ap;
va_start(ap, flags);
mode = va_arg(ap, mode_t);
va_end(ap);
}
return open(path, flags | O_LARGEFILE, mode);
}
#undef creat
int creat(__const char *name, mode_t mode) {
return open(name, O_CREAT | O_WRONLY | O_TRUNC, mode);
}
int close(int fd) {
initialize_passthrough_if_necessary();
xprintf(" close(%d)\n", fd);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
return xtreemfs_close(fd);
} else {
xprintf(" close passthrough(%d)\n", fd);
return ((funcptr_close)libc_close)(fd);
}
}
// failed attempt to intercept libc-internal close calls
int _close(int fd) {
xprintf(" _close(%d)\n", fd);
return close(fd);
}
int __close(int fd) {
xprintf(" __close(%d)\n", fd);
return close(fd);
}
// TODO: do we need this?
/*
int fclose(FILE *fp) {
initialize_passthrough_if_necessary();
xprintf(" fclose(%d)\n", fd);
if (is_xtreemfs_fd(fd)) {
return xtreemfs_fclose(fd);
} else {
xprintf(" fclose passthrough(%d)\n", fd);
return ((funcptr_close)libc_fclose)(fd);
}
}
*/
ssize_t pread(int fd, void* buf, size_t nbyte, off_t offset) {
initialize_passthrough_if_necessary();
xprintf(" pread(%d)\n", fd);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
return xtreemfs_pread(fd, buf, nbyte, offset);
} else {
return ((funcptr_pread)libc_pread)(fd, buf, nbyte, offset);
}
}
ssize_t pread64(int fd, void* buf, size_t nbyte, __off64_t offset) {
initialize_passthrough_if_necessary();
xprintf(" pread64(%d)\n", fd);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
return xtreemfs_pread(fd, buf, nbyte, offset);
} else {
return ((funcptr_pread)libc_pread)(fd, buf, nbyte, offset);
}
}
ssize_t read(int fd, void* buf, size_t nbyte) {
initialize_passthrough_if_necessary();
xprintf(" read(%d)\n", fd);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
return xtreemfs_read(fd, buf, nbyte);
} else {
return ((funcptr_read)libc_read)(fd, buf, nbyte);
}
}
ssize_t write(int fd, const void* buf, size_t nbyte) {
initialize_passthrough_if_necessary();
xprintf(" write(%d)\n", fd);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
xprintf(" write(%d) xtreemfs\n", fd);
return xtreemfs_write(fd, buf, nbyte);
} else {
xprintf(" write(%d) passthrough\n", fd);
return ((funcptr_write)libc_write)(fd, buf, nbyte);
}
}
int dup(int oldfd) {
initialize_passthrough_if_necessary();
xprintf(" dup(%d)\n", oldfd);
if (overlay_initialized() && is_xtreemfs_fd(oldfd)) {
//xprintf("calling xtreemfs dup(%d)\n", oldfd);
//return xtreemfs_dup(oldfd);
xprintf(" NOT IMPLEMENTED: dup for xtreemfs fd(%d)\n", oldfd);
return -1;
} else {
xprintf(" calling pasthrought dup(%d)\n", oldfd);
return ((funcptr_dup)libc_dup)(oldfd);
}
}
int dup2(int oldfd, int newfd) {
initialize_passthrough_if_necessary();
xprintf(" dup2(%d, %d)\n", oldfd, newfd);
if (overlay_initialized() && (is_xtreemfs_fd(newfd) || is_xtreemfs_fd(oldfd))) {
xprintf(" NOT IMPLEMENTED: dup2 for xtreemfs fd(%d, %d)\n", oldfd, newfd);
return -1;
} else {
return ((funcptr_dup2)libc_dup2)(oldfd, newfd);
}
// if (overlay_initialized() && is_xtreemfs_fd(newfd)) {
// xprintf(" dest is xtreemfs fd\n");
// xtreemfs_close(newfd);
// } else {
// xprintf(" dest is system fd\n");
// ((funcptr_close)libc_close)(newfd);
// }
//
// if (overlay_initialized() && is_xtreemfs_fd(oldfd)) {
// return xtreemfs_dup2(oldfd, newfd);
// } else {
// xprintf(" dup2 passthrough\n");
// return ((funcptr_dup2)libc_dup2)(oldfd, newfd);
// }
}
off_t lseek(int fd, off_t offset, int mode) {
initialize_passthrough_if_necessary();
xprintf(" lseek(%d, %ld, %d)\n", fd, offset, mode);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
return xtreemfs_lseek(fd, offset, mode);
} else {
return ((funcptr_lseek)libc_lseek)(fd, offset, mode);
}
}
int stat(const char *path, struct stat *buf) {
initialize_passthrough_if_necessary();
xprintf(" stat(%s, ...)\n", path);
if (overlay_initialized() && is_xtreemfs_path(path)) {
return xtreemfs_stat(path, buf);
} else {
return ((funcptr_stat)libc_stat)(path, buf);
}
}
int fstat(int fd, struct stat *buf) {
initialize_passthrough_if_necessary();
xprintf(" fstat(%d, ...)\n", fd);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
return xtreemfs_fstat(fd, buf);
} else {
return ((funcptr_fstat)libc_fstat)(fd, buf);
}
}
int __xstat(int ver, const char *path, struct stat *buf) {
initialize_passthrough_if_necessary();
xprintf(" __xstat(%d, %s, ...)\n", ver, path);
if (overlay_initialized() && is_xtreemfs_path(path)) {
return xtreemfs_stat(path, buf);
} else {
return ((funcptr___xstat)libc___xstat)(ver, path, buf);
}
}
int __xstat64(int ver, const char *path, struct stat64 *buf) {
initialize_passthrough_if_necessary();
xprintf(" __xstat64(%d, %s, ...)\n", ver, path);
xprintf(" __xstat64(%d, %s, ...), errno(%d)\n", ver, path, errno);
return -1;
// int i = strstr(path, "/gfs1/work/bzaztsch/job-output") == path;
// xprintf(" __xstat64(%s), returning -1 WITHOUT actually calling GetAttr, errno(%d), path(%p), strstr(%p)\n", path, errno, path, i);
// return -1;
// //if (overlay_initialized() && (strstr(path, "/gfs1/work/bzaztsch/job-output") == path)) {
// if ((strstr(path, "/gfs1/work/bzaztsch/job-output") == path)) {
// xprintf(" __xstat64(%s), returning -1 WITHOUT actually calling GetAttr, errno(%d)\n", path, errno);
// return -1;
if (overlay_initialized() && is_xtreemfs_path(path)) {
return xtreemfs_stat64(path, buf);
} else {
return ((funcptr___xstat64)libc___xstat64)(ver, path, buf);
}
}
int __fxstat(int ver, int fd, struct stat *buf) {
initialize_passthrough_if_necessary();
xprintf(" __fxstat(%d, %d, ...)\n", ver, fd);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
return xtreemfs_fstat(fd, buf);
} else {
return ((funcptr___fxstat)libc___fxstat)(ver, fd, buf);
}
}
int __fxstat64(int ver, int fd, struct stat64 *buf) {
initialize_passthrough_if_necessary();
xprintf(" __fxstat64(%d, %d, ...)\n", ver, fd);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
return xtreemfs_fstat64(fd, buf);
} else {
return ((funcptr___fxstat64)libc___fxstat64)(ver, fd, buf);
}
}
extern int __lxstat(int ver, const char *path, struct stat *buf) {
initialize_passthrough_if_necessary();
xprintf(" __lxstat(%d, %s, ...)\n", ver, path);
if (overlay_initialized() && is_xtreemfs_path(path)) {
xprintf(" NOT IMPLEMENTED: lstat for xtreemfs (%s)\n", path);
return -1;
} else {
return ((funcptr___lxstat)libc___lxstat)(ver, path, buf);
}
}
extern int __lxstat64(int ver, const char *path, struct stat64 *buf) {
initialize_passthrough_if_necessary();
xprintf(" __lxstat64(%d, %s, ...)\n", ver, path);
if (overlay_initialized() && is_xtreemfs_path(path)) {
xprintf(" NOT IMPLEMENTED: lstat64 for xtreemfs (%s)\n", path);
return -1;
} else {
return ((funcptr___lxstat64)libc___lxstat64)(ver, path, buf);
}
}
/*
extern int __fxstatat(int __ver, int __fildes, const char *__filename, struct stat *__stat_buf, int __flag) {
initialize_passthrough_if_necessary();
xprintf(" __fxstatat\n");
return -1;
}
extern int __fxstatat64(int __ver, int __fildes, const char *__filename, struct stat64 *__stat_buf, int __flag) {
initialize_passthrough_if_necessary();
xprintf(" __fxstatat64\n");
return -1;
}
*/
FILE *fopen(const char *path, const char *mode) {
initialize_passthrough_if_necessary();
if (overlay_initialized() && is_xtreemfs_path(path)) {
xprintf(" WARNING: fopen(%s, %s) called for xtreemfs\n", path, mode);
}
return ((funcptr_fopen)libc_fopen)(path, mode);
}
int truncate(const char *path, off_t length) {
initialize_passthrough_if_necessary();
xprintf(" truncate(%s, %ld)\n", path, length);
if (overlay_initialized() && is_xtreemfs_path(path)) {
xprintf(" NOT IMPLEMENTED: truncate for xtreemfs (%s)\n", path);
return -1;
} else {
return ((funcptr_truncate)libc_truncate)(path, length);
}
}
int ftruncate(int fd, off_t length) {
initialize_passthrough_if_necessary();
xprintf(" ftruncate(%d, %ld)\n", fd, length);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
xprintf(" NOT IMPLEMENTED: ftruncate for xtreemfs fd(%d)", fd);
return -1;
} else {
return ((funcptr_ftruncate)libc_ftruncate)(fd, length);
}
}
int setxattr(const char *path, const char *name, const void *value, size_t size, int flags) {
initialize_passthrough_if_necessary();
xprintf(" setxattr(%s, %s, ...)\n", path, name);
if (overlay_initialized() && is_xtreemfs_path(path)) {
xprintf(" xtreemfs_setxattr(%s, %s, ...)", path, name);
return xtreemfs_setxattr(path, name, value, size, flags);
} else {
return ((funcptr_setxattr)libattr_setxattr)(path, name, value, size, flags);
}
}
//int lsetxattr (const char *path, const char *name,
// const void *value, size_t size, int flags);
int fsetxattr(int fd, const char *name, const void *value, size_t size, int flags) {
initialize_passthrough_if_necessary();
xprintf(" fsetxattr(%d, %s, ...)\n", fd, name);
if (overlay_initialized() && is_xtreemfs_fd(fd)) {
xprintf(" NOT IMPLEMENTED: fsetxattr for xtreemfs fd(%d)", fd);
return -1;
} else {
return ((funcptr_fsetxattr)libattr_fsetxattr)(fd, name, value, size, flags);
}
}
} // extern "C"

View File

@@ -0,0 +1,97 @@
/*
* Copyright (c) 2014 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "ld_preload/misc.h"
#include <pthread.h>
#include <cstdlib>
#include <cstring>
#include <stdio.h>
#include <fcntl.h>
#include <list>
#include <string>
#include "libxtreemfs/client.h"
#include "libxtreemfs/file_handle.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/pbrpc_url.h"
#include "libxtreemfs/system_user_mapping_unix.h"
#include "libxtreemfs/volume_implementation.h"
xtreemfs::pbrpc::SYSTEM_V_FCNTL ConvertFlagsUnixToXtreemFS(int flags) {
int result = 0;
#define CHECK(result, flags, unix, proto) { \
if ((flags & unix) != 0) result |= proto; \
}
CHECK(result, flags, O_RDONLY , xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_RDONLY);
CHECK(result, flags, O_WRONLY , xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_WRONLY);
CHECK(result, flags, O_RDWR , xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_RDWR);
CHECK(result, flags, O_APPEND , xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_APPEND);
CHECK(result, flags, O_CREAT , xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_CREAT);
CHECK(result, flags, O_TRUNC , xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_TRUNC);
CHECK(result, flags, O_EXCL , xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_EXCL);
CHECK(result, flags, O_SYNC , xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_SYNC);
#ifdef __linux
CHECK(result, flags, O_DSYNC , xtreemfs::pbrpc::SYSTEM_V_FCNTL_H_O_SYNC);
#endif
xprintf("flags: %o\nresult: %o\n", flags, result);
return xtreemfs::pbrpc::SYSTEM_V_FCNTL(result);
}
int TranslateMode(const char* mode) {
if (strcmp(mode, "r") == 0) {
return O_RDONLY;
} else if (strcmp(mode, "r+") == 0) {
return O_RDWR;
} else if (strcmp(mode, "w") == 0) {
return O_WRONLY | O_CREAT | O_TRUNC;
} else if (strcmp(mode, "w+") == 0) {
return O_RDWR | O_CREAT | O_TRUNC;
} else if (strcmp(mode, "a") == 0) {
return O_APPEND | O_WRONLY | O_CREAT;
} else if (strcmp(mode, "a+") == 0) {
return O_APPEND | O_RDWR | O_CREAT;
}
return 0;
}
int ConvertXtreemFSErrnoToUnix(xtreemfs::pbrpc::POSIXErrno xtreemfs_errno) {
switch (xtreemfs_errno) {
case xtreemfs::pbrpc::POSIX_ERROR_EPERM:
return EPERM;
case xtreemfs::pbrpc::POSIX_ERROR_ENOENT:
return ENOENT;
case xtreemfs::pbrpc::POSIX_ERROR_EINTR:
return EINTR;
case xtreemfs::pbrpc::POSIX_ERROR_EIO:
return EIO;
case xtreemfs::pbrpc::POSIX_ERROR_EAGAIN:
return EAGAIN;
case xtreemfs::pbrpc::POSIX_ERROR_EACCES:
return EACCES;
case xtreemfs::pbrpc::POSIX_ERROR_EEXIST:
return EEXIST;
case xtreemfs::pbrpc::POSIX_ERROR_EXDEV:
return EXDEV;
case xtreemfs::pbrpc::POSIX_ERROR_ENODEV:
return ENODEV;
case xtreemfs::pbrpc::POSIX_ERROR_ENOTDIR:
return ENOTDIR;
case xtreemfs::pbrpc::POSIX_ERROR_EISDIR:
return EISDIR;
case xtreemfs::pbrpc::POSIX_ERROR_EINVAL:
return EINVAL;
case xtreemfs::pbrpc::POSIX_ERROR_ENOTEMPTY:
return ENOTEMPTY;
case xtreemfs::pbrpc::POSIX_ERROR_ENODATA:
return ENODATA;
default:
return xtreemfs_errno;
}
}

View File

@@ -0,0 +1,101 @@
/*
* Copyright (c) 2014 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "ld_preload/open_file_table.h"
#include <boost/thread/lock_guard.hpp>
#include "ld_preload/passthrough.h"
OpenFile::OpenFile(xtreemfs::FileHandle* fh) : fh_(fh), offset_(0), tmp_file_(NULL), tmp_file_fd_(-1) {
}
void OpenFile::Initialise() {
tmp_file_ = tmpfile();
tmp_file_fd_ = fileno(tmp_file_);
}
void OpenFile::Deinitialise() {
if (tmp_file_ != NULL) {
fclose(tmp_file_);
tmp_file_ = NULL;
tmp_file_fd_ = -1;
}
}
int OpenFile::GetFileDescriptor() {
return tmp_file_fd_;
}
OpenFileTable::OpenFileTable() {
// open a temporary file as base for returning valid file descriptors that do no interfere with the passthrough values
tmp_file_ = tmpfile();
tmp_file_fd_ = fileno(tmp_file_);
next_fd_ = 10000;
}
OpenFileTable::~OpenFileTable() {
fclose(tmp_file_);
}
int OpenFileTable::Register(xtreemfs::FileHandle* handle) {
boost::lock_guard<boost::mutex> guard(mutex_);
//const int fd = ((funcptr_dup)libc_dup)(tmp_file_fd_); // NOTE: calling dup(tmp_file_fd_) would lead to a deadlock here
OpenFile open_file(handle);
open_file.Initialise();
const int fd = open_file.GetFileDescriptor();
// const int fd = next_fd_++;
open_files_.insert(std::make_pair(fd, open_file));
xprintf(" +fd(%d)\n", fd);
return fd;
}
void OpenFileTable::Unregister(int fd) {
boost::lock_guard<boost::mutex> guard(mutex_);
FileTable::iterator i = open_files_.find(fd);
if (i == open_files_.end()) {
assert(false);
} else {
i->second.Deinitialise();
}
open_files_.erase(fd);
//((funcptr_close)libc_close)(fd); // close(fd);
xprintf(" -fd(%d)\n", fd);
}
OpenFile OpenFileTable::Get(int fd) {
boost::lock_guard<boost::mutex> guard(mutex_);
FileTable::iterator i = open_files_.find(fd);
if (i == open_files_.end()) {
return OpenFile(NULL);
} else {
return i->second;
}
}
int OpenFileTable::Set(int fd, xtreemfs::FileHandle* handle) {
xprintf(" +fd(%d)\n", fd);
boost::lock_guard<boost::mutex> guard(mutex_);
// TODO: fix, see Register
open_files_.insert(std::make_pair(fd, OpenFile(handle)));
return fd;
}
void OpenFileTable::SetOffset(int fd, uint64_t offset) {
boost::lock_guard<boost::mutex> guard(mutex_);
FileTable::iterator i = open_files_.find(fd);
if (i == open_files_.end()) {
return;
} else {
i->second.offset_ = offset;
}
}
bool OpenFileTable::Has(int fd) {
boost::lock_guard<boost::mutex> guard(mutex_);
return open_files_.find(fd) != open_files_.end();
}

View File

@@ -0,0 +1,94 @@
/*
* Copyright (c) 2014 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "ld_preload/passthrough.h"
#include <dlfcn.h>
#include <pthread.h>
#include <stdio.h>
void* libc;
void* libc_open;
void* libc_close;
void* libc___close;
void* libc_read;
void* libc_write;
void* libc_pread;
void* libc_pwrite;
void* libc_dup;
void* libc_dup2;
void* libc_lseek;
void* libc_stat;
void* libc_fstat;
void* libc___xstat;
void* libc___xstat64;
void* libc___fxstat;
void* libc___fxstat64;
void* libc___lxstat;
void* libc___lxstat64;
void* libc_fopen;
void* libc_truncate;
void* libc_ftruncate;
void* libattr;
void* libattr_setxattr;
void* libattr_fsetxattr;
// Our "copy" of stdout, because the application might close stdout
// or reuse the first file descriptors for other purposes.
static FILE* fdout = 0;
FILE* xtreemfs_stdout() {
return fdout;
}
static void initialize_passthrough() {
xprintf("initialize_passthrough(): Setting up pass-through\n");
libc = dlopen("libc.so.6", RTLD_LAZY); // TODO: link with correct libc, version vs. 32 bit vs. 64 bit
libc_open = dlsym(libc, "open");
libc_close = dlsym(libc, "close");
libc___close = dlsym(libc, "__close");
libc_read = dlsym(libc, "read");
libc_write = dlsym(libc, "write");
libc_pread = dlsym(libc, "pread");
libc_pwrite = dlsym(libc, "pwrite");
libc_dup = dlsym(libc, "dup");
libc_dup2 = dlsym(libc, "dup2");
libc_lseek = dlsym(libc, "lseek");
libc_stat = dlsym(libc, "stat");
libc_fstat = dlsym(libc, "fstat");
libc___xstat = dlsym(libc, "__xstat");
libc___xstat64 = dlsym(libc, "__xstat64");
libc___fxstat = dlsym(libc, "__fxstat");
libc___fxstat64 = dlsym(libc, "__fxstat64");
libc___lxstat = dlsym(libc, "__lxstat");
libc___lxstat64 = dlsym(libc, "__lxstat64");
libc_fopen = dlsym(libc, "fopen");
libc_truncate = dlsym(libc, "truncate");
libc_ftruncate = dlsym(libc, "ftruncate");
libattr = dlopen("libattr.so.1", RTLD_LAZY);
libattr_setxattr = dlsym(libattr, "setxattr");
libattr_fsetxattr = dlsym(libattr, "setxattr");
int stdout2 = ((funcptr_dup)libc_dup)(1);
if (stdout2 != -1) {
fdout = fdopen(stdout2, "a");
}
xprintf("initialize_passthrough(): New stdout %d\n", stdout2);
}
static pthread_once_t passthrough_initialized = PTHREAD_ONCE_INIT;
void initialize_passthrough_if_necessary() {
pthread_once(&passthrough_initialized, initialize_passthrough);
}

View File

@@ -0,0 +1,38 @@
/*
* Copyright (c) 2014 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "ld_preload/path.h"
#include <cstring>
#include "ld_preload/passthrough.h"
Path::Path(const char* pathname) : pathname_(pathname), xtreemfs_path_(NULL) {
}
void Path::SetXtreemFSPrefix(const std::string& prefix) {
GetXtreemFSPrefix() = prefix;
}
bool Path::IsXtreemFS() {
xprintf("Debug: IsXtreemFS: pathname=%s, prefix=%s\n", pathname_, GetXtreemFSPrefix().c_str());
return strstr(pathname_, GetXtreemFSPrefix().c_str()) == pathname_;
}
std::string& Path::GetXtreemFSPrefix() {
static std::string xtreemFSPrefix;
return xtreemFSPrefix;
}
void Path::Parse() {
xtreemfs_path_ = &pathname_[GetXtreemFSPrefix().size()];
xprintf("Info: Path::Parse(): path=%s\n", xtreemfs_path_);
}
const char* Path::GetXtreemFSPath() {
return xtreemfs_path_;
}

View File

@@ -0,0 +1,300 @@
/*
* Copyright (c) 2014 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "ld_preload/preload.h"
#include <pthread.h>
#include <cstdlib>
#include <cstring>
#include <exception>
#include <stdio.h>
#include <fcntl.h>
#include <list>
#include <string>
#include <boost/atomic.hpp>
#include "libxtreemfs/client.h"
#include "libxtreemfs/file_handle.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/pbrpc_url.h"
#include "libxtreemfs/system_user_mapping_unix.h"
#include "libxtreemfs/volume_implementation.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
#include "ld_preload/environment.h"
#include "ld_preload/misc.h"
#include "ld_preload/passthrough.h"
static Environment* env = NULL;
/**
* The environment should be initialised exactly once during shared library initialisation.
* Prior to that, all operations work in pass-through mode.
* To overcome undefined global initialisation order, we use a local static variable.
* This variable is accesd by a single writer (lib (de)initialisation), and multiple readers (exported operations).
* Writer: toggle == true (toggles is_initialised)
* Reader: toggle == false (returns the current value of is_initialised)
*
* NOTE: pthread_once initialisation for the overlay leads to a deadlock due to a re-entry situation:
* - environment ctor is called the first time via pthread_once
* - which leads a write somewhere (probably to a socket during client initialisation)
* - which then enters the pthread_once initialisation mechanism again and blocks deadlocks
*/
bool overlay_initialized(bool toggle /* defaults to: false */) {
static boost::atomic<bool> is_initilized(false);
// writer
if(toggle) {
return is_initilized.exchange(!is_initilized.load());
}
// reader
return is_initilized.load();
}
void initialize_env() {
env = new Environment();
xprintf("env initialised\n");
}
void deinitialize_env() {
xprintf("deinitialize_env()\n");
delete env;
xprintf("env deinitialised\n");
}
Environment* get_env() {
return env;
}
bool is_xtreemfs_fd(int fd) {
return env->open_file_table_.Has(fd);
}
bool is_xtreemfs_path(const char *path) {
Path path_obj(path);
return path_obj.IsXtreemFS();
}
/* Library constructor */
void
__attribute__((constructor))
init_preload(void) {
initialize_passthrough_if_necessary();
xprintf("library initialisation 1\n");
xprintf("library initialisation 2\n");
initialize_env();
xprintf("library initialisation 3\n");
bool ret = overlay_initialized(true); // activate overlay filesystem
xprintf("library initialised! %d\n", ret);
}
/* Library destructor */
void
__attribute__((destructor))
deinit_preload(void) {
xprintf("library deinitialisation started.\n");
bool ret = overlay_initialized(true); // deactivate overlay filesystem
deinitialize_env();
xprintf("library deinitialised! %d\n", ret);
}
/* XtreemFS equivalents to POSIX file system calls */
int xtreemfs_open(const char* pathname, int flags, int mode) {
xprintf(" open on xtreemfs(%s)\n", pathname);
Path path(pathname);
path.Parse();
xtreemfs::Volume* volume = env->GetVolume();
const xtreemfs::pbrpc::SYSTEM_V_FCNTL xtreem_flags = ConvertFlagsUnixToXtreemFS(flags);
xtreemfs::FileHandle* handle = volume->OpenFile(
env->user_creds_,
path.GetXtreemFSPath(),
xtreem_flags,
mode);
int fd = env->open_file_table_.Register(handle);
xprintf(" open on xtreemfs(%s) -> %d\n", pathname, fd);
return fd;
}
int xtreemfs_close(int fd) {
xprintf(" close xtreemfs(%d)\n", fd);
OpenFile handle = env->open_file_table_.Get(fd);
env->open_file_table_.Unregister(fd);
//handle.fh_->Flush(); // implicit by close
handle.fh_->Close(); // TODO: error code
return 0;
}
uint64_t xtreemfs_pread(int fd, void* buf, uint64_t nbyte, uint64_t offset) {
xprintf(" read xtreemfs(%d)\n", fd);
OpenFile handle = env->open_file_table_.Get(fd);
return handle.fh_->Read((char*)buf, nbyte, offset);
}
uint64_t xtreemfs_read(int fd, void* buf, uint64_t nbyte) {
xprintf(" read xtreemfs(%d)\n", fd);
OpenFile handle = env->open_file_table_.Get(fd);
int read = handle.fh_->Read((char*)buf, nbyte, handle.offset_);
env->open_file_table_.SetOffset(fd, handle.offset_ + read);
return read;
}
uint64_t xtreemfs_write(int fd, const void* buf, uint64_t nbyte) {
xprintf(" write xtreemfs(%d)\n", fd);
OpenFile handle = env->open_file_table_.Get(fd);
int written = handle.fh_->Write((char*)buf, nbyte, handle.offset_);
env->open_file_table_.SetOffset(fd, handle.offset_ + written);
return written;
}
int xtreemfs_dup2(int oldfd, int newfd) {
xprintf(" dup2 xtreemfs(%d, %d)\n", oldfd, newfd);
OpenFile handle = env->open_file_table_.Get(oldfd);
if (handle.fh_ == NULL) {
xprintf(" dup2 error(%d, %d)\n", oldfd, newfd);
return -1;
}
xprintf(" dup2 fffxtreemfs(%d, %d)\n", oldfd, newfd);
xtreemfs::FileHandle* new_handle; // = handle.fh_->Duplicate(); // TODO: implement Duplicate
xprintf(" dup2 yxtreemfs(%d, %d)\n", oldfd, newfd);
env->open_file_table_.Set(newfd, new_handle);
return newfd;
}
int xtreemfs_dup(int fd) {
xprintf(" dup xtreemfs(%d)\n", fd);
OpenFile handle = env->open_file_table_.Get(fd);
xtreemfs::FileHandle* new_handle; // = handle.fh_->Duplicate(); // TODO: implement Duplicate
return env->open_file_table_.Register(new_handle);
}
off_t xtreemfs_lseek(int fd, off_t offset, int mode) {
xprintf(" lseek xtreemfs(%d)\n", fd);
OpenFile handle = env->open_file_table_.Get(fd);
switch (mode) {
case SEEK_SET:
env->open_file_table_.SetOffset(fd, offset);
return offset;
case SEEK_CUR:
env->open_file_table_.SetOffset(fd, handle.offset_ + offset);
return handle.offset_ + offset;
case SEEK_END:
env->open_file_table_.SetOffset(fd, offset); // TODO
return offset;
}
return EINVAL;
}
/* T should be "struct stat" or "struct stat64" */
template<typename T>
static int xtreemfs_stat_impl(const char *pathname, T *buf) {
Path path(pathname);
path.Parse();
xtreemfs::pbrpc::Stat stat;
try {
env->GetVolume()->GetAttr(env->user_creds_, path.GetXtreemFSPath(), &stat);
} catch(const xtreemfs::PosixErrorException& e) {
errno = ConvertXtreemFSErrnoToUnix(e.posix_errno());
xprintf(" xtreemfs_stat_impl(%s), returning -1, errno(%d)\n", pathname, errno);
return -1;
} catch(const xtreemfs::XtreemFSException& e) {
xprintf(" xtreemfs_stat_impl(%s), returning -1, errno(%d)\n", pathname, errno);
errno = EIO;
return -1;
} catch(const std::exception& e) {
xprintf("A non-XtreemFS exception occurred: %s", std::string(e.what()).c_str());
xprintf(" xtreemfs_stat_impl(%s), returning -1, errno(%d)\n", pathname, errno);
errno = EIO;
return -1;
}
ConvertXtreemFSStatToUnix(stat, buf, env->GetSystemUserMapping());
xprintf(" xtreemfs_fstat_impl(%s), returning 0\n", pathname);
return 0;
}
/* T should be "struct stat" or "struct stat64" */
template<typename T>
static int xtreemfs_fstat_impl(int fd, T *buf) {
OpenFile handle = env->open_file_table_.Get(fd);
xtreemfs::pbrpc::Stat stat;
try {
handle.fh_->GetAttr(env->user_creds_, &stat);
} catch(const xtreemfs::PosixErrorException& e) {
errno = ConvertXtreemFSErrnoToUnix(e.posix_errno());
return -1;
} catch(const xtreemfs::XtreemFSException& e) {
errno = EIO;
return -1;
} catch(const std::exception& e) {
xprintf("A non-XtreemFS exception occurred: %s", std::string(e.what()).c_str());
errno = EIO;
return -1;
}
ConvertXtreemFSStatToUnix(stat, buf, env->GetSystemUserMapping());
return 0;
}
int xtreemfs_stat(const char *pathname, struct stat *buf) {
xprintf(" xtreemfs_stat(%s)\n", pathname);
return xtreemfs_stat_impl(pathname, buf);
}
int xtreemfs_stat64(const char *pathname, struct stat64 *buf) {
xprintf(" xtreemfs_stat64(%s)\n", pathname);
return xtreemfs_stat_impl(pathname, buf);
}
int xtreemfs_fstat(int fd, struct stat *buf) {
xprintf(" xtreemfs_fstat(%d)\n", fd);
return xtreemfs_fstat_impl(fd, buf);
}
int xtreemfs_fstat64(int fd, struct stat64 *buf) {
xprintf(" xtreemfs_fstat64(%d)\n", fd);
return xtreemfs_fstat_impl(fd, buf);
}
int xtreemfs_setxattr(const char *pathname, const char *name, const void *value, size_t size, int flags) {
Path path(pathname);
path.Parse();
try {
env->GetVolume()->SetXAttr(env->user_creds_, path.GetXtreemFSPath(), std::string(name),
std::string(static_cast<const char*>(value), size), static_cast<xtreemfs::pbrpc::XATTR_FLAGS>(flags));
} catch(const xtreemfs::PosixErrorException& e) {
errno = ConvertXtreemFSErrnoToUnix(e.posix_errno());
return -1;
} catch(const xtreemfs::XtreemFSException& e) {
errno = EIO;
return -1;
} catch(const std::exception& e) {
xprintf("A non-XtreemFS exception occurred: %s", std::string(e.what()).c_str());
errno = EIO;
return -1;
}
return 0;
}
int xtreemfs_fsetxattr(int fd, const char *name, const void *value, size_t size, int flags) {
// OpenFile handle = env->open_file_table_.Get(fd);
// env->GetVolume()->SetXAttr(env->user_creds_, path.GetXtreemFSPath(), name, value, flags);
//
// handle.fh_->SetX
return 0;
}

View File

@@ -0,0 +1,94 @@
/*
* Copyright (c) 2014 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "ld_preload/preload_options.h"
#include <csignal>
#include <boost/lexical_cast.hpp>
#include <boost/program_options/cmdline.hpp>
#include <boost/tokenizer.hpp>
#include <iostream>
#include <sstream>
#include "libxtreemfs/helper.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "ld_preload/passthrough.h"
using namespace std;
namespace po = boost::program_options;
namespace style = boost::program_options::command_line_style;
namespace xtreemfs {
PreloadOptions::PreloadOptions() : Options(), preload_descriptions_("Preload Options") {
// TODO:
//preload_descriptions_.add_options()
}
void PreloadOptions::ParseCommandLine(int argc, char** argv) {
// Parse general options and retrieve unregistered options for own parsing.
vector<string> options = Options::ParseCommandLine(argc, argv);
// Read Volume URL and mount point from command line.
po::positional_options_description p;
p.add("xtreemfs_url", 1);
p.add("mount_point", 1);
po::options_description mount("Mount options");
mount.add_options()
("xtreemfs_url", po::value(&xtreemfs_url), "DIR to use (without volume)")
("mount_point", po::value(&mount_point), "where to virtually mount the volume")
;
// Parse command line.
po::options_description all_descriptions_;
all_descriptions_.add(mount).add(preload_descriptions_);
po::variables_map vm;
try {
po::store(po::command_line_parser(options)
.options(all_descriptions_)
.positional(p)
.style(style::default_style & ~style::allow_guessing)
.run(), vm);
po::notify(vm);
} catch(const std::exception& e) {
// Rethrow boost errors due to invalid command line parameters.
throw InvalidCommandLineParametersException(string(e.what()));
}
// Do not check parameters if the help shall be shown.
if (show_help || empty_arguments_list || show_version) {
return;
}
// Extract information from command line.
try {
Options::ParseURL(kDIR);
} catch(const xtreemfs::XtreemFSException& e) {
xprintf("excpetion\n");
cout << "ParseURL, error: " << e.what() << endl << endl;
}
// Check for required parameters.
if (service_addresses.empty()) {
throw InvalidCommandLineParametersException("missing DIR host.");
}
if (volume_name.empty()) {
throw InvalidCommandLineParametersException("missing volume name.");
}
if (mount_point.empty()) {
throw InvalidCommandLineParametersException("missing virtual mount point.");
}
}
std::string PreloadOptions::ShowCommandLineHelp() {
ostringstream stream;
stream << preload_descriptions_
<< Options::ShowCommandLineHelp();
return stream.str();
}
} // namespace xtreemfs

View File

@@ -0,0 +1,58 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/async_write_buffer.h"
#include <cassert>
#include <cstring>
#include "xtreemfs/OSD.pb.h"
namespace xtreemfs {
AsyncWriteBuffer::AsyncWriteBuffer(xtreemfs::pbrpc::writeRequest* write_request,
const char* data,
size_t data_length,
FileHandleImplementation* file_handle,
XCapHandler* xcap_handler)
: write_request(write_request),
data_length(data_length),
file_handle(file_handle),
xcap_handler_(xcap_handler),
use_uuid_iterator(true),
state_(PENDING),
retry_count_(0) {
assert(write_request && data && file_handle);
this->data = new char[data_length];
memcpy(this->data, data, data_length);
}
AsyncWriteBuffer::AsyncWriteBuffer(xtreemfs::pbrpc::writeRequest* write_request,
const char* data,
size_t data_length,
FileHandleImplementation* file_handle,
XCapHandler* xcap_handler,
const std::string& osd_uuid)
: write_request(write_request),
data_length(data_length),
file_handle(file_handle),
xcap_handler_(xcap_handler),
use_uuid_iterator(false),
osd_uuid(osd_uuid),
state_(PENDING),
retry_count_(0) {
assert(write_request && data && file_handle);
this->data = new char[data_length];
memcpy(this->data, data, data_length);
}
AsyncWriteBuffer::~AsyncWriteBuffer() {
delete write_request;
delete[] data;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,663 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/async_write_handler.h"
#include <cassert>
#include <boost/lexical_cast.hpp>
#include <google/protobuf/descriptor.h>
#include <string>
#include "libxtreemfs/async_write_buffer.h"
#include "libxtreemfs/file_handle_implementation.h"
#include "libxtreemfs/file_info.h"
#include "libxtreemfs/interrupt.h"
#include "libxtreemfs/uuid_iterator.h"
#include "libxtreemfs/uuid_resolver.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "pbrpc/RPC.pb.h"
#include "util/error_log.h"
#include "util/logging.h"
#include "util/synchronized_queue.h"
#include "xtreemfs/OSDServiceClient.h"
using namespace std;
using namespace xtreemfs::util;
using namespace xtreemfs::pbrpc;
namespace xtreemfs {
AsyncWriteHandler::AsyncWriteHandler(
FileInfo* file_info,
UUIDIterator* uuid_iterator,
UUIDResolver* uuid_resolver,
xtreemfs::pbrpc::OSDServiceClient* osd_service_client,
const xtreemfs::pbrpc::Auth& auth_bogus,
const xtreemfs::pbrpc::UserCredentials& user_credentials_bogus,
const Options& volume_options,
util::SynchronizedQueue<CallbackEntry>& callback_queue)
: state_(IDLE),
pending_bytes_(0),
pending_writes_(0),
writing_paused_(false),
waiting_blocking_threads_count_(0),
file_info_(file_info),
uuid_iterator_(uuid_iterator),
uuid_resolver_(uuid_resolver),
uuid_resolver_options_(volume_options.max_write_tries,
volume_options.retry_delay_s,
false,
NULL),
osd_service_client_(osd_service_client),
auth_bogus_(auth_bogus),
user_credentials_bogus_(user_credentials_bogus),
volume_options_(volume_options),
max_writeahead_(volume_options.async_writes_max_requests *
volume_options.async_writes_max_request_size_kb * 1024),
max_requests_(volume_options.async_writes_max_requests),
max_write_tries_(volume_options.max_write_tries),
redirected_(false),
fast_redirect_(false),
worst_write_buffer_(0),
callback_queue_(callback_queue) {
assert(file_info && uuid_iterator && uuid_resolver && osd_service_client);
}
AsyncWriteHandler::~AsyncWriteHandler() {
if (pending_writes_ > 0) {
string path;
file_info_->GetPath(&path);
string error = "The AsyncWriteHandler for the file with the path: " + path
+ " has pending writes left. This must NOT happen.";
Logging::log->getLog(LEVEL_ERROR) << error << endl;
ErrorLog::error_log->AppendError(error);
assert(pending_writes_ == 0);
}
if (waiting_blocking_threads_count_ > 0) {
string path;
file_info_->GetPath(&path);
string error = "The AsyncWriteHandler for the file"
" with the path: " + path + " has remaining blocked threads waiting"
" for the completion of pending writes left. This must NOT happen.";
Logging::log->getLog(LEVEL_ERROR) << error << endl;
ErrorLog::error_log->AppendError(error);
assert(waiting_blocking_threads_count_ == 0);
}
if (waiting_observers_.size() > 0) {
string path;
file_info_->GetPath(&path);
string error = "The AsyncWriteHandler for the file"
" with the path: " + path + " has remaining observers (calls waiting"
" for the completion of pending writes) left. This must NOT happen.";
Logging::log->getLog(LEVEL_ERROR) << error << endl;
ErrorLog::error_log->AppendError(error);
assert(waiting_observers_.size() == 0);
}
for (list<WaitForCompletionObserver*>::iterator it
= waiting_observers_.begin();
it != waiting_observers_.end();
++it) {
delete *it;
}
}
void AsyncWriteHandler::Write(AsyncWriteBuffer* write_buffer) {
assert(write_buffer);
if (write_buffer->data_length > static_cast<size_t>(max_writeahead_)) {
throw XtreemFSException("The maximum allowed writeahead size: "
+ boost::lexical_cast<string>(max_writeahead_)
+ " is smaller than the size of this write request: "
+ boost::lexical_cast<string>(write_buffer->data_length));
}
// Append to list of writes in flight.
{
boost::mutex::scoped_lock lock(mutex_);
while ((state_ != FINALLY_FAILED) && (writing_paused_ ||
(pending_bytes_ + write_buffer->data_length) >
static_cast<size_t>(max_writeahead_) ||
writes_in_flight_.size() == max_requests_)) {
// TODO(mberlin): Allow interruption and set the write status of the
// FileHandle of the interrupted write to an error state.
pending_bytes_were_decreased_.wait(lock);
}
assert(writes_in_flight_.size() <= static_cast<size_t>(max_requests_));
// NOTE: the following is done here to reach all threads that started
// waiting before the final failure
if (state_ == FINALLY_FAILED) {
string error =
"Tried to asynchronously write to a finally failed write handler.";
Logging::log->getLog(LEVEL_ERROR) << error << endl;
throw PosixErrorException(POSIX_ERROR_EIO, error);
}
++pending_writes_;
IncreasePendingBytesHelper(write_buffer, &lock);
}
WriteCommon(write_buffer, NULL, false);
}
void AsyncWriteHandler::ReWrite(AsyncWriteBuffer* write_buffer,
boost::mutex::scoped_lock* lock) {
assert(write_buffer && lock && lock->owns_lock() &&
(state_ == HAS_FAILED_WRITES));
write_buffer->retry_count_++;
write_buffer->state_ = AsyncWriteBuffer::PENDING;
++pending_writes_;
WriteCommon(write_buffer, lock, true);
}
void AsyncWriteHandler::WriteCommon(AsyncWriteBuffer* write_buffer,
boost::mutex::scoped_lock* lock,
bool is_rewrite) {
assert(write_buffer && ((lock && is_rewrite && lock->owns_lock())
|| (!lock && !is_rewrite)));
// Retrieve address for UUID.
string osd_uuid, osd_address;
if (write_buffer->use_uuid_iterator) {
uuid_iterator_->GetUUID(&osd_uuid);
// Store used OSD in write_buffer for the callback.
write_buffer->osd_uuid = osd_uuid;
} else {
osd_uuid = write_buffer->osd_uuid;
}
try {
uuid_resolver_->UUIDToAddressWithOptions(osd_uuid,
&osd_address,
uuid_resolver_options_);
} catch (const XtreemFSException&) {
if (is_rewrite) {
// In case of errors, throw exception.
--pending_writes_;
} else {
// In case of errors, remove write again and throw exception.
boost::mutex::scoped_lock lock(mutex_);
DecreasePendingBytesHelper(write_buffer, &lock, true);
--pending_writes_;
}
throw;
}
// save the resolved uuid in the buffer (used for logging)
write_buffer->service_address = osd_address;
// make sure to use the potentially renewed XCap
write_buffer->xcap_handler_->GetXCap(write_buffer->write_request
->mutable_file_credentials()->mutable_xcap());
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "AsyncWriteHandler::(Re)Write for file_id: "
<< write_buffer->write_request->mutable_file_credentials()
->xcap().file_id()
<< ", XCap Expiration in: " << (write_buffer->write_request
->mutable_file_credentials()->xcap().expire_time_s() - time(NULL))
<< endl;
}
// Send out request.
write_buffer->request_sent_time =
boost::posix_time::microsec_clock::local_time();
osd_service_client_->write(osd_address,
auth_bogus_,
user_credentials_bogus_,
write_buffer->write_request,
write_buffer->data,
write_buffer->data_length,
this,
reinterpret_cast<void*>(write_buffer));
}
void AsyncWriteHandler::WaitForPendingWrites() {
boost::mutex::scoped_lock lock(mutex_);
if (pending_writes_ > 0) {
writing_paused_ = true;
waiting_blocking_threads_count_++;
while (pending_writes_ > 0) {
all_pending_writes_did_complete_.wait(lock);
}
waiting_blocking_threads_count_--;
}
}
/*
* @param condition_variable notify_one() is called on it if there are no more
* pending writes.
* @param wait_completed Set to true if the pending async writes did finish.
* Needed for observers who wait on condition_variable
* and may be subject to spurious wake ups.
* @param wait_completed_mutex Mutex of observer which guards wait_completed.
*
* @return True if the wait would have blocked and an observer was registered,
* otherwise false (i. e. the observer was not registered).
*
* @remark Ownership is not transferred to the caller.
*/
bool AsyncWriteHandler::WaitForPendingWritesNonBlocking(
boost::condition* condition_variable,
bool* wait_completed,
boost::mutex* wait_completed_mutex) {
assert(condition_variable && wait_completed && wait_completed_mutex);
boost::mutex::scoped_lock lock(mutex_);
if (pending_writes_ > 0) {
writing_paused_ = true;
waiting_observers_.push_back(new WaitForCompletionObserver(
condition_variable,
wait_completed,
wait_completed_mutex));
return true;
} else {
*wait_completed = true;
return false;
}
}
void AsyncWriteHandler::ProcessCallbacks(util::SynchronizedQueue<CallbackEntry>& callback_queue) {
while (!(boost::this_thread::interruption_requested() &&
boost::this_thread::interruption_enabled())) {
const CallbackEntry& entry = callback_queue.Dequeue();
try {
entry.handler_->HandleCallback(entry.response_message_,
entry.data_,
entry.data_length_,
entry.error_,
entry.context_);
} catch (const exception& e) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "AsyncWriteHandler::ProcessCallbacks(): caught unhandled "
"exception: " << e.what() << ". Invalidating file handle." << endl;
}
entry.handler_->FailFinallyHelper();
}
}
}
void AsyncWriteHandler::CallFinished(
xtreemfs::pbrpc::OSDWriteResponse* response_message,
char* data,
uint32_t data_length,
xtreemfs::pbrpc::RPCHeader::ErrorResponse* error,
void* context) {
callback_queue_.Enqueue(CallbackEntry(this,
response_message,
data,
data_length,
error,
context));
}
void AsyncWriteHandler::HandleCallback(
xtreemfs::pbrpc::OSDWriteResponse* response_message,
char* data,
uint32_t data_length,
xtreemfs::pbrpc::RPCHeader::ErrorResponse* error,
void* context) {
boost::mutex::scoped_lock lock(mutex_);
bool delete_response_message = true;
--pending_writes_; // we received some answer we were waiting for
// do nothing in case a write has finally failed
if (state_ != FINALLY_FAILED) {
AsyncWriteBuffer* write_buffer = reinterpret_cast<AsyncWriteBuffer*>(context);
if (error) {
write_buffer->state_ = AsyncWriteBuffer::FAILED;
writing_paused_ = true; // forbid new writes
bool first_fail = state_ != HAS_FAILED_WRITES;
if (first_fail) {
state_ = HAS_FAILED_WRITES;
worst_error_.MergeFrom(*error);
worst_write_buffer_ = write_buffer;
}
// Resolve UUID first.
const std::string& service_uuid = write_buffer->osd_uuid;;
const std::string& service_address = write_buffer->service_address;
if (((write_buffer->retry_count_ < max_write_tries_ || max_write_tries_ == 0) ||
// or this last retry should be delayed
(write_buffer->retry_count_ == max_write_tries_)) &&
// AND it is an recoverable error.
(error->error_type() == xtreemfs::pbrpc::IO_ERROR ||
error->error_type() == xtreemfs::pbrpc::INTERNAL_SERVER_ERROR ||
error->error_type() == xtreemfs::pbrpc::REDIRECT)) {
std::string error_str;
xtreemfs::util::LogLevel level = xtreemfs::util::LEVEL_ERROR;
// Special handling of REDIRECT "errors".
if (error->error_type() == xtreemfs::pbrpc::REDIRECT) {
assert(error->has_redirect_to_server_uuid());
// Log only the first redirect in a row of redirect errors
// since redirect precedes all other errors, the following condition
// identifies the first redirect (and we log before setting
// worst_error_)
if (worst_error_.error_type() != xtreemfs::pbrpc::REDIRECT) {
level = xtreemfs::util::LEVEL_INFO;
error_str = "The server with the UUID: " + service_uuid
+ " redirected to the current master with the UUID: "
+ error->redirect_to_server_uuid()
+ " after attempt: "
+ boost::lexical_cast<std::string>(write_buffer->retry_count_);
if (xtreemfs::util::Logging::log->loggingActive(level)) {
xtreemfs::util::Logging::log->getLog(level) << error_str << std::endl;
}
}
// set the current error as new worst error if it is worse:
// REDIRECT is worse than other error types and worse than a
// previous REDIRECT error if it belongs to a more recent request
if ((worst_error_.error_type() != xtreemfs::pbrpc::REDIRECT) ||
(worst_write_buffer_->request_sent_time <
write_buffer->request_sent_time)) {
worst_error_.CopyFrom(*error);
worst_write_buffer_ = write_buffer;
}
} else {
// Communication error or Internal Server Error.
// set the current error as new worst error if it is worse:
// a non-REDIRECT error is worse than another non-REDIRECT error
// from a request with an earlier time stamp
if ((worst_error_.error_type() != xtreemfs::pbrpc::REDIRECT) &&
(worst_write_buffer_->request_sent_time <
write_buffer->request_sent_time)) {
worst_error_.CopyFrom(*error);
}
// Log only the first retry in a series of write requests
if (first_fail) {
//if (write_buffer->retry_count_ == 1 && max_write_tries_ != 1) {
std::string retries_left = max_write_tries_ == 0 ? "infinite"
: boost::lexical_cast<std::string>(max_write_tries_
- write_buffer->retry_count_);
error_str = "Got no response from server "
+ service_address + " (" + service_uuid + ")"
+ ", retrying ("
+ boost::lexical_cast<std::string>(retries_left)
+ " attempts left) (Possible reason: The server is using SSL,"
+ " and the client is not.)";
if (xtreemfs::util::Logging::log->loggingActive(level)) {
xtreemfs::util::Logging::log->getLog(level) << error_str << std::endl;
}
}
}
} else { // if (recoverable error and retries left)
// FAIL finally after too many retries, or unrecoverable errors
state_ = FINALLY_FAILED;
// final cleanup is done when the last expected callback arrives
// Log error.
string error_type_name = boost::lexical_cast<string>(error->error_type());
const ::google::protobuf::EnumValueDescriptor* enum_desc =
ErrorType_descriptor()->FindValueByNumber(error->error_type());
if (enum_desc) {
error_type_name = enum_desc->name();
}
string error_message = "An async write sent to the server "
+ write_buffer->osd_uuid + " failed finally."
+ " Error type: " + error_type_name
+ " Error message: " + error->error_message()
+ " Complete error header: " + error->DebugString();
Logging::log->getLog(LEVEL_ERROR) << error_message << endl;
ErrorLog::error_log->AppendError(error_message);
// Cleanup is done at the end...
}
} else { // if (error)
// Write was successful.
if (state_ != HAS_FAILED_WRITES) {
// Tell FileInfo about the OSDWriteResponse.
if (response_message->has_size_in_bytes()) {
XCap xcap;
write_buffer->file_handle->GetXCap(&xcap);
if (file_info_->TryToUpdateOSDWriteResponse(response_message, xcap)) {
// Ownership of response_message was transferred, do not delete it.
delete_response_message = false;
}
}
}
write_buffer->state_ = AsyncWriteBuffer::SUCCEEDED;
DeleteBufferHelper(&lock); // do all deletes
}
// start retrying when this is the callback of the last response
// all handling of fails is done here
if ((state_ == HAS_FAILED_WRITES) && (pending_writes_ == 0)) {
// handle all errors according to the most relevant one
// NOTE: only handle-able errors with enough retries can make it
// until here
if (worst_error_.error_type() == xtreemfs::pbrpc::REDIRECT) {
uuid_iterator_->SetCurrentUUID(worst_error_.redirect_to_server_uuid());
// first fast reconnect
if (!redirected_) {
redirected_ = true;
fast_redirect_ = true;
}
} else {
// Mark the current UUID as failed and get the next one.
uuid_iterator_->MarkUUIDAsFailed(worst_write_buffer_->osd_uuid);
}
// delay retries to avoid flooding.
// delay = retry_delay - (current_time - request_sent_time)
boost::posix_time::time_duration delay_time_left =
boost::posix_time::seconds(volume_options_.retry_delay_s) - // delay
(boost::posix_time::microsec_clock::local_time() - // current time
worst_write_buffer_->request_sent_time);
if (!(fast_redirect_ || delay_time_left.is_negative())) {
try {
// Log time left
if (xtreemfs::util::Logging::log->loggingActive(
xtreemfs::util::LEVEL_INFO)) {
xtreemfs::util::Logging::log->getLog(xtreemfs::util::LEVEL_INFO)
<< "Retrying. Waiting " << boost::lexical_cast<std::string>(
(delay_time_left.is_negative() || fast_redirect_) ? 0 :
delay_time_left.total_seconds())
<< " more seconds till next retry."
<< std::endl;
}
// boost::thread interruption point
Interruptibilizer::SleepInterruptible(
delay_time_left.total_milliseconds(),
NULL);
} catch (const boost::thread_interrupted&) {
// Cleanup.
if (delete_response_message) {
delete response_message;
}
delete [] data;
delete error;
throw;
}
} else {
fast_redirect_ = false;
}
// rewrite all in list (leading successfully sent entries have been
// deleted by the DeleteBufferHelper() call above)
try {
std::list<AsyncWriteBuffer*>::iterator it;
for (it = writes_in_flight_.begin();
it != writes_in_flight_.end();
++it) {
ReWrite(*it, &lock);
}
// reset states
state_ = WRITES_PENDING;
worst_error_.Clear();
worst_write_buffer_ = 0;
} catch (const XtreemFSException& e) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "AsyncWriteHandler::HandleCallback(): caught exception: "
<< e.what() << ". Invalidating file handle." << endl;
}
state_ = FINALLY_FAILED; // Cleanup follow below
}
} // if ((state_ == HAS_FAILED_WRITES) && (pending_writes_ == 0))
} // if (state_ != FINALLY_FAILED)
// check current again and clean up if writing has finally failed and this
// is the last expected cleanup
if ((state_ == FINALLY_FAILED) && (pending_writes_ == 0)) {
CleanUp(&lock);
}
// Cleanup.
if (delete_response_message) {
delete response_message;
}
delete [] data;
delete error;
}
void AsyncWriteHandler::IncreasePendingBytesHelper(
AsyncWriteBuffer* write_buffer,
boost::mutex::scoped_lock* lock) {
assert(write_buffer && lock && lock->owns_lock());
pending_bytes_ += write_buffer->data_length;
writes_in_flight_.push_back(write_buffer);
assert(writes_in_flight_.size() <= static_cast<size_t>(max_requests_));
state_ = WRITES_PENDING;
}
void AsyncWriteHandler::DecreasePendingBytesHelper(
AsyncWriteBuffer* write_buffer,
boost::mutex::scoped_lock* lock,
bool delete_buffer) {
assert(write_buffer && lock && lock->owns_lock());
pending_bytes_ -= write_buffer->data_length;
if (delete_buffer) {
// the buffer is deleted
writes_in_flight_.remove(write_buffer);
delete write_buffer;
}
if (pending_bytes_ == 0) {
state_ = IDLE;
redirected_ = false;
fast_redirect_ = false;
if (writing_paused_) {
writing_paused_ = false;
NotifyWaitingObserversAndClearAll(lock);
}
// Issue notify_all as long as there are remaining blocked threads.
//
// Please note the following here: After the two notify_all()s on the
// condition variables all_pending_writes_did_complete_ and
// pending_bytes_were_decreased_, two different thread types
// (waiting blocked ones AND further waiting writes) do race for
// re-acquiring the lock on mutex_.
// Example:
// T1: write1 state_ = PENDING
// T2: getattr writing_paused_ = true => blocked as state_ != IDLE
// T1: write2 => blocked as writing_paused_ = true
// Tx: write1 callback: state = IDLE, writing_paused_ = false
// T1: write2 succeeds to obtain lock on mutex_ *before* getattr
// => state = IDLE (writing_paused_ remains false)
// Tx: write2 callback: state = IDLE, writing paused remains false
// - however its necessary to notify the blocked getattr.
// As you can see the order of concurrent writes and reads/getattrs
// is undefined and we don't enforce any order as it's up to the user to
// synchronize his threads himself when working on the same file.
if (waiting_blocking_threads_count_ > 0) {
all_pending_writes_did_complete_.notify_all();
}
}
// Tell blocked writers there may be enough space/writing
if (!writing_paused_) {
pending_bytes_were_decreased_.notify_all();
}
}
void AsyncWriteHandler::DeleteBufferHelper(
boost::mutex::scoped_lock* lock) {
assert(lock && lock->owns_lock());
// delete all leading successfully sent entries
std::list<AsyncWriteBuffer*>::iterator it = writes_in_flight_.begin();
while (it != writes_in_flight_.end()) {
if ((*it)->state_ == AsyncWriteBuffer::SUCCEEDED) {
DecreasePendingBytesHelper(*it, lock, false);
delete *it; // delete buffer
it = writes_in_flight_.erase(it); // delete pointer to buffer in list
} else {
break; // break the loop on first occurrence of a not yet successfully
// sent element
}
}
assert(!writes_in_flight_.empty() || (pending_bytes_ == 0));
}
void AsyncWriteHandler::FailFinallyHelper() {
boost::mutex::scoped_lock lock(mutex_);
state_ = FINALLY_FAILED;
}
void AsyncWriteHandler::CleanUp(boost::mutex::scoped_lock* lock) {
assert(lock && lock->owns_lock() && (state_ == FINALLY_FAILED));
// delete all buffers
std::list<AsyncWriteBuffer*>::iterator it = writes_in_flight_.begin();
while (it != writes_in_flight_.end()) {
(*it)->file_handle->MarkAsyncWritesAsFailed(); // mark all file handles
delete *it; // delete buffers
it = writes_in_flight_.erase(it); // delete pointer to buffer in list
}
// wake up all waiting threads
NotifyWaitingObserversAndClearAll(lock);
if (waiting_blocking_threads_count_ > 0) {
all_pending_writes_did_complete_.notify_all();
}
pending_bytes_were_decreased_.notify_all();
}
void AsyncWriteHandler::NotifyWaitingObserversAndClearAll(
boost::mutex::scoped_lock* lock) {
assert(lock && lock->owns_lock());
// Tell waiting observers that the write did finish.
for (list<WaitForCompletionObserver*>::iterator it
= waiting_observers_.begin();
it != waiting_observers_.end();
++it) {
boost::mutex::scoped_lock lock(*((*it)->wait_completed_mutex));
*((*it)->wait_completed) = true;
(*it)->condition_variable->notify_one();
delete *it;
}
waiting_observers_.clear();
}
} // namespace xtreemfs

View File

@@ -0,0 +1,69 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/client.h"
#include <string>
#include "libxtreemfs/client_implementation.h"
#include "xtreemfs/GlobalTypes.pb.h"
namespace xtreemfs {
Client* Client::CreateClient(
const ServiceAddresses& dir_service_addresses,
const xtreemfs::pbrpc::UserCredentials& user_credentials,
const xtreemfs::rpc::SSLOptions* ssl_options,
const Options& options) {
return CreateClient(dir_service_addresses,
user_credentials,
ssl_options,
options,
kDefaultClient);
}
Client* Client::CreateClient(
const ServiceAddresses& dir_service_addresses,
const xtreemfs::pbrpc::UserCredentials& user_credentials,
const xtreemfs::rpc::SSLOptions* ssl_options,
const Options& options,
ClientImplementationType type) {
switch (type) {
case kDefaultClient:
return new ClientImplementation(dir_service_addresses,
user_credentials,
ssl_options,
options);
default:
return NULL;
}
}
void Client::CreateVolume(
const ServiceAddresses& mrc_address,
const xtreemfs::pbrpc::Auth& auth,
const xtreemfs::pbrpc::UserCredentials& user_credentials,
const std::string& volume_name) {
std::list<xtreemfs::pbrpc::KeyValuePair*> volume_attributes; // Empty.
CreateVolume(mrc_address,
auth,
user_credentials,
volume_name,
511,
"",
"",
xtreemfs::pbrpc::ACCESS_CONTROL_POLICY_POSIX,
0,
xtreemfs::pbrpc::STRIPING_POLICY_RAID0,
128,
1,
volume_attributes);
}
} // namespace xtreemfs

View File

@@ -0,0 +1,567 @@
/*
* Copyright (c) 2011-2014 by Michael Berlin, Zuse Institute Berlin
* 2010-2011 by Patrick Schaefer, Zuse Institute Berlin
*
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/client_implementation.h"
#include <cstdlib>
#include <boost/bind.hpp>
#include <boost/thread/thread.hpp>
#include "libxtreemfs/async_write_handler.h"
#include "libxtreemfs/execute_sync_request.h"
#include "libxtreemfs/helper.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/pbrpc_url.h"
#include "libxtreemfs/uuid_iterator.h"
#include "libxtreemfs/vivaldi.h"
#include "libxtreemfs/volume_implementation.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
#include "util/error_log.h"
#include "xtreemfs/DIRServiceClient.h"
#include "xtreemfs/MRCServiceClient.h"
#include "xtreemfs/OSDServiceClient.h"
using namespace std;
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
namespace xtreemfs {
static void AddAddresses(const ServiceAddresses& service_addresses,
SimpleUUIDIterator* uuid_iterator) {
ServiceAddresses::Addresses as_list = service_addresses.GetAddresses();
for (ServiceAddresses::Addresses::const_iterator iter = as_list.begin();
iter != as_list.end(); ++iter) {
uuid_iterator->AddUUID(*iter);
}
}
DIRUUIDResolver::DIRUUIDResolver(
const ServiceAddresses& dir_addresses,
const pbrpc::UserCredentials& user_credentials,
const Options& options)
: dir_service_user_credentials_(user_credentials),
options_(options) {
AddAddresses(dir_addresses, &dir_service_addresses_);
// Currently no AUTH is needed to access the DIR.
dir_service_auth_.set_auth_type(AUTH_NONE);
}
void DIRUUIDResolver::Initialize(xtreemfs::rpc::Client* network_client) {
dir_service_client_.reset(new DIRServiceClient(network_client));
}
void DIRUUIDResolver::UUIDToAddress(const std::string& uuid,
std::string* address) {
UUIDToAddressWithOptions(uuid, address, RPCOptionsFromOptions(options_));
}
void DIRUUIDResolver::UUIDToAddressWithOptions(const std::string& uuid,
std::string* address,
const RPCOptions& options) {
// The UUID must never be empty.
assert(!uuid.empty());
// Try to search in cache.
*address = uuid_cache_.get(uuid);
if (!address->empty()) {
return; // Cache-Hit.
}
addressMappingGetRequest rq = addressMappingGetRequest();
rq.set_uuid(uuid);
boost::scoped_ptr<rpc::SyncCallbackBase> response(
ExecuteSyncRequest(
boost::bind(
&xtreemfs::pbrpc::DIRServiceClient::
xtreemfs_address_mappings_get_sync,
dir_service_client_.get(),
_1,
boost::cref(dir_service_auth_),
boost::cref(dir_service_user_credentials_),
&rq),
&dir_service_addresses_,
NULL,
options,
true));
boost::unordered_set<string> local_networks = GetNetworks();
AddressMappingSet* set = static_cast<AddressMappingSet*>(
response->response());
AddressMapping found_address_mapping;
for (int i = 0; i < set->mappings_size(); i++) {
const AddressMapping& am = set->mappings(i);
if (am.protocol() != PBRPCURL::GetSchemePBRPC()
&& am.protocol() != PBRPCURL::GetSchemePBRPCS()
&& am.protocol() != PBRPCURL::GetSchemePBRPCG()
&& am.protocol() != PBRPCURL::GetSchemePBRPCU()) {
Logging::log->getLog(LEVEL_ERROR)
<< "Unknown scheme: " << am.protocol() << endl;
response->DeleteBuffers();
throw UnknownAddressSchemeException("Unknown scheme: " + am.protocol());
}
const string& network = am.match_network();
// Prefer the UUID for a matching network, use the default otherwise.
if (network == "*") {
found_address_mapping = am;
} else {
boost::unordered_set<string>::const_iterator local_network
= local_networks.find(network);
if (local_network != local_networks.end()) {
found_address_mapping = am;
break;
}
}
}
if (found_address_mapping.IsInitialized()) {
uuid_cache_.update(uuid,
found_address_mapping.address(),
found_address_mapping.port(),
found_address_mapping.ttl_s());
ostringstream s;
s << found_address_mapping.address() << ":" << found_address_mapping.port();
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "Service found for UUID: " << s.str() << endl;
}
response->DeleteBuffers();
*address = s.str();
} else {
Logging::log->getLog(LEVEL_ERROR)
<< "Service not found for UUID: " << uuid << endl;
response->DeleteBuffers();
throw AddressToUUIDNotFoundException(uuid);
}
}
void DIRUUIDResolver::VolumeNameToMRCUUID(const std::string& volume_name,
std::string* mrc_uuid) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "MRC: searching volume on MRC: " << volume_name << endl;
}
// Check if there is a @ in the volume_name.
// Everything behind the @ has to be removed as it identifies the snapshot.
string parsed_volume_name = volume_name;
size_t at_pos = volume_name.find("@");
if (at_pos != string::npos) {
parsed_volume_name = volume_name.substr(0, at_pos);
}
serviceGetByNameRequest rq = serviceGetByNameRequest();
rq.set_name(parsed_volume_name);
boost::scoped_ptr<rpc::SyncCallbackBase> response(
ExecuteSyncRequest(
boost::bind(
&xtreemfs::pbrpc::DIRServiceClient::
xtreemfs_service_get_by_name_sync,
dir_service_client_.get(),
_1,
boost::cref(dir_service_auth_),
boost::cref(dir_service_user_credentials_),
&rq),
&dir_service_addresses_,
NULL,
RPCOptionsFromOptions(options_),
true));
ServiceSet* service_set = static_cast<ServiceSet*>(response->response());
*mrc_uuid = "";
for (int i = 0; i < service_set->services_size(); i++) {
Service service = service_set->services(i);
if ((service.type() == SERVICE_TYPE_VOLUME)
&& (service.name() == parsed_volume_name)) {
const ServiceDataMap& data = service.data();
for (int j = 0; j < data.data_size(); j++) {
if (data.data(j).key() == "mrc") {
*mrc_uuid = data.data(j).value();
break;
}
}
}
}
response->DeleteBuffers();
if (mrc_uuid->empty()) {
Logging::log->getLog(LEVEL_ERROR) << "No MRC found for volume: "
<< volume_name << std::endl;
throw VolumeNotFoundException(volume_name);
}
}
void DIRUUIDResolver::VolumeNameToMRCUUID(const std::string& volume_name,
SimpleUUIDIterator* uuid_iterator) {
assert(uuid_iterator);
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "MRC: searching volume on MRC: " << volume_name << endl;
}
// Check if there is a @ in the volume_name.
// Everything behind the @ has to be removed as it identifies the snapshot.
string parsed_volume_name = volume_name;
size_t at_pos = volume_name.find("@");
if (at_pos != string::npos) {
parsed_volume_name = volume_name.substr(0, at_pos);
}
serviceGetByNameRequest rq = serviceGetByNameRequest();
rq.set_name(parsed_volume_name);
boost::scoped_ptr<rpc::SyncCallbackBase> response(
ExecuteSyncRequest(
boost::bind(
&xtreemfs::pbrpc::DIRServiceClient::
xtreemfs_service_get_by_name_sync,
dir_service_client_.get(),
_1,
boost::cref(dir_service_auth_),
boost::cref(dir_service_user_credentials_),
&rq),
&dir_service_addresses_,
NULL,
RPCOptionsFromOptions(options_),
true));
bool mrc_found = false;
ServiceSet* service_set = static_cast<ServiceSet*>(response->response());
for (int i = 0; i < service_set->services_size(); i++) {
Service service = service_set->services(i);
if ((service.type() == SERVICE_TYPE_VOLUME)
&& (service.name() == parsed_volume_name)) {
const ServiceDataMap& data = service.data();
for (int j = 0; j < data.data_size(); j++) {
if (data.data(j).key().substr(0, 3) == "mrc") {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "MRC with UUID: " << data.data(j).value()
<< " added (key: " << data.data(j).key() << ")." << std::endl;
}
uuid_iterator->AddUUID(data.data(j).value());
mrc_found = true;
}
}
}
}
response->DeleteBuffers();
if (!mrc_found) {
Logging::log->getLog(LEVEL_ERROR) << "No MRC found for volume: "
<< volume_name << std::endl;
throw VolumeNotFoundException(volume_name);
}
}
ClientImplementation::ClientImplementation(
const ServiceAddresses& dir_service_addresses,
const pbrpc::UserCredentials& user_credentials,
const rpc::SSLOptions* ssl_options,
const Options& options)
: was_shutdown_(false),
options_(options),
dir_service_ssl_options_(ssl_options),
uuid_resolver_(dir_service_addresses,
user_credentials,
options) {
// Set bogus auth object.
auth_bogus_.set_auth_type(AUTH_NONE);
initialize_logger(options.log_level_string,
options.log_file_path,
LEVEL_WARN);
initialize_error_log(20);
if (options_.vivaldi_enable) {
vivaldi_.reset(new Vivaldi(dir_service_addresses,
GetUUIDResolver(),
options_));
}
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "Created a new libxtreemfs Client "
"object (version " << options.version_string << ")" << endl;
}
}
ClientImplementation::~ClientImplementation() {
Shutdown();
if (!list_open_volumes_.empty()) {
string error = "Client::~Client(): Not all XtreemFS volumes were closed."
" Did you forget to call Client::Shutdown()? Memory leaks are the"
" consequence.";
Logging::log->getLog(LEVEL_ERROR) << error << endl;
ErrorLog::error_log->AppendError(error);
}
network_client_->shutdown();
network_client_thread_->join();
// Since we wait for outstanding requests, the RPC client (network_client_)
// has to shutdown first and then we can wait for the Vivaldi thread.
// The other way around a deadlock might occur.
if (vivaldi_thread_.get() && vivaldi_thread_->joinable()) {
vivaldi_thread_->join();
}
atexit(google::protobuf::ShutdownProtobufLibrary);
shutdown_logger();
shutdown_error_log();
}
void ClientImplementation::Start() {
// start network (rpc) client
network_client_.reset(new xtreemfs::rpc::Client(
options_.connect_timeout_s,
options_.request_timeout_s,
options_.linger_timeout_s,
dir_service_ssl_options_));
network_client_thread_.reset(
new boost::thread(boost::bind(&xtreemfs::rpc::Client::run,
network_client_.get())));
GenerateVersion4UUID(&client_uuid_);
assert(!client_uuid_.empty());
uuid_resolver_.Initialize(network_client_.get());
// Start vivaldi thread if configured
if (options_.vivaldi_enable) {
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO)
<< "Starting vivaldi..." << endl;
}
vivaldi_->Initialize(network_client_.get());
vivaldi_thread_.reset(new boost::thread(boost::bind(&xtreemfs::Vivaldi::Run,
vivaldi_.get())));
}
async_write_callback_thread_.reset(
new boost::thread(&xtreemfs::AsyncWriteHandler::ProcessCallbacks,
boost::ref(async_write_callback_queue_)));
}
void ClientImplementation::Shutdown() {
if (!was_shutdown_) {
was_shutdown_ = true;
boost::mutex::scoped_lock lock(list_open_volumes_mutex_);
// Issue Close() on every Volume and remove it's pointer.
list<VolumeImplementation*>::iterator it;
while (!list_open_volumes_.empty()) {
it = list_open_volumes_.begin();
(*it)->CloseInternal();
delete *it;
it = list_open_volumes_.erase(it);
}
if (async_write_callback_thread_->joinable()) {
async_write_callback_thread_->interrupt();
async_write_callback_thread_->join();
}
// Stop vivaldi thread if running
if (vivaldi_thread_.get() && vivaldi_thread_->joinable()) {
vivaldi_thread_->interrupt();
}
}
}
Volume* ClientImplementation::OpenVolume(
const std::string& volume_name,
const xtreemfs::rpc::SSLOptions* ssl_options,
const Options& options) {
// TODO(mberlin): Fix possible leak through the use of scoped_ptr and swap().
SimpleUUIDIterator* mrc_uuid_iterator = new SimpleUUIDIterator;
uuid_resolver_.VolumeNameToMRCUUID(volume_name, mrc_uuid_iterator);
VolumeImplementation* volume = new VolumeImplementation(
this,
client_uuid_,
mrc_uuid_iterator,
volume_name,
ssl_options,
options);
{
boost::mutex::scoped_lock lock(list_open_volumes_mutex_);
list_open_volumes_.push_back(volume);
}
volume->Start();
return volume;
}
void ClientImplementation::CloseVolume(xtreemfs::Volume* volume) {
boost::mutex::scoped_lock lock(list_open_volumes_mutex_);
// Find given volume pointer address in list of open volumes and erase it.
// Free it afterwards.
list<VolumeImplementation*>::iterator it;
for (it = list_open_volumes_.begin(); it != list_open_volumes_.end(); ++it) {
if (*it == volume) {
// Free Volume object.
delete *it;
it = list_open_volumes_.erase(it);
}
}
}
void ClientImplementation::CreateVolume(
const ServiceAddresses& mrc_address,
const xtreemfs::pbrpc::Auth& auth,
const xtreemfs::pbrpc::UserCredentials& user_credentials,
const std::string& volume_name,
int mode,
const std::string& owner_username,
const std::string& owner_groupname,
const xtreemfs::pbrpc::AccessControlPolicyType& access_policy,
long volume_quota,
const xtreemfs::pbrpc::StripingPolicyType& default_striping_policy_type,
int default_stripe_size,
int default_stripe_width,
const std::list<xtreemfs::pbrpc::KeyValuePair*>& volume_attributes) {
MRCServiceClient mrc_service_client(network_client_.get());
xtreemfs::pbrpc::Volume new_volume;
new_volume.set_id("");
new_volume.set_mode(mode);
new_volume.set_name(volume_name);
new_volume.set_owner_user_id(owner_username);
new_volume.set_owner_group_id(owner_groupname);
new_volume.set_access_control_policy(access_policy);
new_volume.set_quota(volume_quota);
new_volume.mutable_default_striping_policy()
->set_type(default_striping_policy_type);
new_volume.mutable_default_striping_policy()
->set_stripe_size(default_stripe_size);
new_volume.mutable_default_striping_policy()->set_width(default_stripe_width);
for (list<KeyValuePair*>::const_iterator it = volume_attributes.begin();
it != volume_attributes.end();
++it) {
new_volume.add_attrs();
new_volume.mutable_attrs(new_volume.attrs_size() - 1)->set_key((*it)->key());
new_volume.mutable_attrs(new_volume.attrs_size() - 1)
->set_value((*it)->value());
}
SimpleUUIDIterator temp_uuid_iterator_with_addresses;
AddAddresses(mrc_address, &temp_uuid_iterator_with_addresses);
boost::scoped_ptr<rpc::SyncCallbackBase> response(
ExecuteSyncRequest(
boost::bind(
&xtreemfs::pbrpc::MRCServiceClient::xtreemfs_mkvol_sync,
&mrc_service_client,
_1,
boost::cref(auth),
boost::cref(user_credentials),
&new_volume),
&temp_uuid_iterator_with_addresses,
NULL,
RPCOptionsFromOptions(options_),
true));
response->DeleteBuffers();
}
void ClientImplementation::DeleteVolume(
const ServiceAddresses& mrc_address,
const xtreemfs::pbrpc::Auth& auth,
const xtreemfs::pbrpc::UserCredentials& user_credentials,
const std::string& volume_name) {
MRCServiceClient mrc_service_client(network_client_.get());
xtreemfs_rmvolRequest rmvol_request;
rmvol_request.set_volume_name(volume_name);
SimpleUUIDIterator temp_uuid_iterator_with_addresses;
AddAddresses(mrc_address, &temp_uuid_iterator_with_addresses);
boost::scoped_ptr<rpc::SyncCallbackBase> response(
ExecuteSyncRequest(
boost::bind(
&xtreemfs::pbrpc::MRCServiceClient::xtreemfs_rmvol_sync,
&mrc_service_client,
_1,
boost::cref(auth),
boost::cref(user_credentials),
&rmvol_request),
&temp_uuid_iterator_with_addresses,
NULL,
RPCOptionsFromOptions(options_),
true));
response->DeleteBuffers();
}
xtreemfs::pbrpc::Volumes* ClientImplementation::ListVolumes(
const ServiceAddresses& mrc_addresses,
const xtreemfs::pbrpc::Auth& auth) {
// Create a MRCServiceClient
MRCServiceClient mrc_service_client(network_client_.get());
// Use bogus user_credentials;
UserCredentials user_credentials;
user_credentials.set_username("xtreemfs");
SimpleUUIDIterator mrc_service_addresses_;
AddAddresses(mrc_addresses, &mrc_service_addresses_);
// Retrieve the list of volumes from the MRC.
boost::scoped_ptr<rpc::SyncCallbackBase> response(
ExecuteSyncRequest(
boost::bind(
&xtreemfs::pbrpc::MRCServiceClient::xtreemfs_lsvol_sync,
&mrc_service_client,
_1,
boost::cref(auth),
boost::cref(user_credentials)),
&mrc_service_addresses_,
NULL,
RPCOptionsFromOptions(options_),
true));
// Delete everything except the response.
delete[] response->data();
delete response->error();
// Return the list of volumes.
return static_cast<xtreemfs::pbrpc::Volumes*>(response->response());
}
/** ClientImplementation already implements UUIDResolver and therefore this
* returns just a cast to this. */
UUIDResolver* ClientImplementation::GetUUIDResolver() {
return &uuid_resolver_;
}
std::string ClientImplementation::UUIDToAddress(const std::string& uuid) {
std::string result;
uuid_resolver_.UUIDToAddress(uuid, &result);
return result;
}
const VivaldiCoordinates& ClientImplementation::GetVivaldiCoordinates() const {
return vivaldi_->GetVivaldiCoordinates();
}
util::SynchronizedQueue<AsyncWriteHandler::CallbackEntry>& ClientImplementation::GetAsyncWriteCallbackQueue() {
return async_write_callback_queue_;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,56 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/container_uuid_iterator.h"
#include "libxtreemfs/uuid_container.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
void ContainerUUIDIterator::Clear() {
boost::mutex::scoped_lock lock(mutex_);
uuids_.clear();
// Empty list, i.e. current UUID is set to the past-the-end element.
current_uuid_ = uuids_.end();
}
// NOTE: like simple iterator, BUT without implicit adding
void ContainerUUIDIterator::SetCurrentUUID(const string& uuid) {
boost::mutex::scoped_lock lock(mutex_);
// Search "uuid" in "uuids_" and set it to the current UUID.
for (list<UUIDItem*>::iterator it = uuids_.begin();
it != uuids_.end();
++it) {
if ((*it)->uuid == uuid) {
current_uuid_ = it;
// Reset its current state.
(*current_uuid_)->Reset();
return;
}
}
// UUID was not found, fail.
Logging::log->getLog(LEVEL_ERROR)
<< "ContainerUUIDIterator::SetCurrentUUID: uuid not found. " << endl;
}
void ContainerUUIDIterator::AddUUIDItem(UUIDItem* uuid) {
boost::mutex::scoped_lock lock(mutex_);
uuids_.push_back(uuid);
// If its the first element, set the current UUID to the first element.
if (uuids_.size() == 1) {
current_uuid_ = uuids_.begin();
}
}
} // namespace xtreemfs

View File

@@ -0,0 +1,478 @@
/*
* Copyright (c) 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/execute_sync_request.h"
#include <stdint.h>
#include <algorithm>
#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <boost/format.hpp>
#include <boost/function.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/thread/thread.hpp>
#include <ctime>
#include <google/protobuf/descriptor.h>
#include <iostream>
#include <string>
#include "libxtreemfs/interrupt.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/uuid_iterator.h"
#include "libxtreemfs/uuid_resolver.h"
#include "libxtreemfs/xcap_handler.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "pbrpc/RPC.pb.h"
#include "rpc/sync_callback.h"
#include "util/error_log.h"
#include "util/logging.h"
using std::endl;
using std::string;
using namespace xtreemfs::util;
using namespace xtreemfs::pbrpc;
namespace xtreemfs {
/** Helper function which delays the execution and logs an error.
*
* The delay ensures the server won't be flooded.
*
* @throws boost::thread_interrupted if interrupted.
*
* @remarks Ownership of "response" is transferred if function throws.
*/
void DelayNextRetry(const RPCOptions& options,
const boost::posix_time::ptime& request_sent_time,
const std::string& delay_error,
const xtreemfs::util::LogLevel level,
rpc::SyncCallbackBase* response) {
// delay = retry_delay - (current_time - request_sent_time)
boost::posix_time::time_duration delay_time_left =
boost::posix_time::seconds(options.retry_delay_s()) - // delay
(boost::posix_time::microsec_clock::local_time() - // current time
request_sent_time);
string msg = delay_error;
if (!delay_time_left.is_negative() && !msg.empty()) {
// Append time left to error message.
msg += ", waiting "
+ boost::str(boost::format("%.1f") % (std::max(
0.0,
static_cast<double>(
delay_time_left.total_milliseconds()) / 1000)))
+ " more seconds till next attempt.";
}
if (!msg.empty()) {
if (Logging::log->loggingActive(level)) {
Logging::log->getLog(level) << msg << endl;
}
ErrorLog::error_log->AppendError(msg);
}
if (!delay_time_left.is_negative()) {
try {
Interruptibilizer::SleepInterruptible(
static_cast<int>(delay_time_left.total_milliseconds()),
options.was_interrupted_cb());
} catch (const boost::thread_interrupted&) {
if (response != NULL) {
// Free response.
response->DeleteBuffers();
delete response;
}
throw;
}
}
}
/** Retries to execute the synchronous request "sync_function" up to "options.
* options.max_retries()" times and may get interrupted. The "uuid_iterator"
* object is used to retrieve UUIDs or mark them as failed.
* If uuid_iterator_has_addresses=true, the resolving of the UUID is skipped
* and the string retrieved by uuid_iterator->GetUUID() is used as address.
* (in this case uuid_resolver may be NULL).
*
* The parameter delay_last_attempt should be set true, if this method is
* called with options.max_retries() = 1 and one does the looping over the
* retries on its own (for instance in FileHandleImplementation::AcquireLock).
* If set to false this method would return immediately after the _last_ try
* and the caller would have to ensure the delay of options.retry_delay_s on
*
* Ownership of arguments is NOT transferred.
*
*/
rpc::SyncCallbackBase* ExecuteSyncRequest(
boost::function<rpc::SyncCallbackBase* (const std::string&)> sync_function,
UUIDIterator* uuid_iterator,
UUIDResolver* uuid_resolver,
const RPCOptions& options,
bool uuid_iterator_has_addresses,
XCapHandler* xcap_handler,
xtreemfs::pbrpc::XCap* xcap_in_req) {
assert(uuid_iterator_has_addresses || uuid_resolver);
assert((!xcap_handler && !xcap_in_req) || (xcap_handler && xcap_in_req));
const int kMaxRedirectsInARow = 5;
int attempt = 0;
int redirects_in_a_row = 0;
bool max_redirects_in_a_row_exceeded = false;
rpc::SyncCallbackBase* response = NULL;
string service_uuid = "";
string service_address;
// Retry unless maximum tries reached or interrupted.
while ((++attempt <= options.max_retries() || options.max_retries() == 0) &&
!Interruptibilizer::WasInterrupted(options.was_interrupted_cb())) {
// Delete any previous response;
if (response != NULL) {
response->DeleteBuffers();
delete response;
}
// Resolve UUID first.
if (uuid_iterator_has_addresses) {
uuid_iterator->GetUUID(&service_address);
} else {
uuid_iterator->GetUUID(&service_uuid);
uuid_resolver->UUIDToAddressWithOptions(service_uuid,
&service_address,
options);
}
// Execute request.
// Send out request.
boost::posix_time::ptime request_sent_time =
boost::posix_time::microsec_clock::local_time();
if (attempt > 1 && xcap_handler && xcap_in_req) {
xcap_handler->GetXCap(xcap_in_req);
}
response = sync_function(service_address);
bool has_failed;
try {
has_failed = response->HasFailed();
} catch (const boost::thread_interrupted&) {
if (response != NULL) {
// Wait until request was processed - otherwise leaks and accesses
// to deleted memory may occur.
response->HasFailed();
// Free response.
response->DeleteBuffers();
delete response;
}
throw;
}
// Check response.
if (has_failed) {
// Retry only if it is a recoverable error (REDIRECT, IO_ERROR, INTERNAL_SERVER_ERROR). // NOLINT
bool retry = false;
// Message to be logged and respective log level if retry occurs.
string delay_error;
LogLevel level = LEVEL_ERROR;
const RPCHeader::ErrorResponse err = *(response->error());
if (err.error_type() == REDIRECT) {
retry = true;
redirects_in_a_row++;
assert(err.has_redirect_to_server_uuid());
uuid_iterator->SetCurrentUUID(err.redirect_to_server_uuid());
level = LEVEL_INFO;
if (uuid_iterator_has_addresses) {
delay_error = "The server: " + service_address
+ " redirected to the current master: "
+ err.redirect_to_server_uuid()
+ " at attempt: " + boost::lexical_cast<string>(attempt);
} else {
delay_error = "The server with the UUID: " + service_uuid
+ " redirected to the current master with the UUID: "
+ err.redirect_to_server_uuid()
+ " at attempt: " + boost::lexical_cast<string>(attempt);
}
// Ignore the number of attempts if kMaxRedirectsInARow is not reached.
if (redirects_in_a_row <= kMaxRedirectsInARow) {
--attempt;
} else {
max_redirects_in_a_row_exceeded = true;
level = LEVEL_ERROR;
}
// If it's the first redirect, do a fast retry and do not delay.
if (redirects_in_a_row == 1) {
if (Logging::log->loggingActive(level)) {
Logging::log->getLog(level) << delay_error << endl;
}
ErrorLog::error_log->AppendError(delay_error);
continue;
}
} else {
redirects_in_a_row = 0;
}
if (err.error_type() == IO_ERROR ||
err.error_type() == INTERNAL_SERVER_ERROR) {
// Log only the first retry.
if (attempt == 1 && options.max_retries() != 1) {
string retries_left = options.max_retries() == 0 ? "infinite"
: boost::lexical_cast<string>(options.max_retries() - attempt);
delay_error = "Got no response from server "
+ (uuid_iterator_has_addresses ? service_address
: ( service_address + " (" + service_uuid + ")"))
+ ", retrying ("
+ boost::lexical_cast<string>(retries_left)
+ " attempts left) (Possible reason: The server is using SSL,"
+ " and the client is not.)";
}
retry = true;
// Mark the current UUID as failed and get the next one.
if (uuid_iterator_has_addresses) {
uuid_iterator->MarkUUIDAsFailed(service_address);
uuid_iterator->GetUUID(&service_address);
} else {
uuid_iterator->MarkUUIDAsFailed(service_uuid);
uuid_iterator->GetUUID(&service_uuid);
}
}
// Retry (and delay)?
if (retry &&
// Attempts left
(attempt < options.max_retries() || options.max_retries() == 0 ||
// or this last retry should be delayed.
(attempt == options.max_retries() && options.delay_last_attempt()))) { // NOLINT
DelayNextRetry(options, request_sent_time, delay_error, level, response); // NOLINT
} else {
break; // Do not retry if error occurred - throw exception below.
}
} else {
// No error happened, check for possible interruption.
} // if (response->HasFailed())
// Have we been interrupted?
if (Interruptibilizer::WasInterrupted(options.was_interrupted_cb())) {
if (Logging::log->loggingActive(LEVEL_INFO)) {
string error = "Caught interrupt, aborting sync request.";
Logging::log->getLog(LEVEL_INFO) << error << endl;
ErrorLog::error_log->AppendError(error);
}
// Clear the current response.
if (response != NULL) {
response->DeleteBuffers();
}
delete response;
response = NULL;
break; // Do not retry if interrupted.
}
// Do not retry if request was successful.
if (response != NULL && !response->HasFailed()) {
break;
}
} // while("attempts left" || "not interrupted")
// Request was successful.
if (response && !response->HasFailed()) {
if (attempt > 1 || max_redirects_in_a_row_exceeded) {
string msg = "After retrying the client succeeded to receive a response"
" at attempt " + boost::lexical_cast<string>(attempt)
+ " from server: "
+ (uuid_iterator_has_addresses ? service_address
: ( service_address + " (" + service_uuid + ")"));
Logging::log->getLog(LEVEL_INFO) << msg << endl;
ErrorLog::error_log->AppendError(msg);
}
return response;
}
// Output number of retries if not failed at the first retry.
string retry_count_msg;
if (attempt > 1) {
retry_count_msg = ". Request finally failed after: "
+ boost::lexical_cast<string>(attempt) + " attempts.";
} else {
retry_count_msg = "";
}
// Max attempts reached or non-IO error seen. Throw an exception.
if (response != NULL) {
// Copy error information in order to delete buffers before the throw.
const RPCHeader::ErrorResponse& error_resp = *(response->error());
const ErrorType error_type = error_resp.error_type();
string error_message = error_resp.error_message();
if (error_message.empty()) {
error_message = "none given";
}
const POSIXErrno posix_errno = error_resp.posix_errno();
string redirect_target = "";
if (error_resp.has_redirect_to_server_uuid()) {
redirect_target = error_resp.redirect_to_server_uuid();
}
// Free buffers.
response->DeleteBuffers();
delete response;
// By default all errors are logged as errors.
LogLevel level = LEVEL_ERROR;
// String for complete error text which will be logged.
string error;
// Throw an exception.
switch (error_type) {
case ERRNO: {
// Posix errors are usually not logged as errors.
level = LEVEL_INFO;
if (posix_errno == POSIX_ERROR_ENOENT) {
level = LEVEL_DEBUG;
}
if (posix_errno == POSIX_ERROR_EIO) {
level = LEVEL_ERROR;
}
string posix_errno_string = boost::lexical_cast<string>(posix_errno);
const ::google::protobuf::EnumValueDescriptor* enum_desc =
POSIXErrno_descriptor()->FindValueByNumber(posix_errno);
if (enum_desc) {
posix_errno_string = enum_desc->name();
}
error = "The server "
+ (uuid_iterator_has_addresses ? service_address
: ( service_address + " (" + service_uuid + ")"))
+ " denied the requested operation."
" Error Value: " + posix_errno_string
+ " Error message: " + error_message
+ retry_count_msg;
if (Logging::log->loggingActive(level)) {
Logging::log->getLog(level) << error << endl;
ErrorLog::error_log->AppendError(error);
}
throw PosixErrorException(posix_errno, error);
}
case IO_ERROR: {
error = "The client encountered a communication error sending a request"
" to the server: "
+ (uuid_iterator_has_addresses ? service_address
: ( service_address + " (" + service_uuid + ")"))
+ ". Error: " + error_message + retry_count_msg;
if (Logging::log->loggingActive(level)) {
Logging::log->getLog(level) << error << endl;
}
ErrorLog::error_log->AppendError(error);
throw IOException(error_message);
}
case INTERNAL_SERVER_ERROR: {
error = "The server "
+ (uuid_iterator_has_addresses ? service_address
: ( service_address + " (" + service_uuid + ")"))
+ " returned an internal server error: " + error_message
+ retry_count_msg;
if (Logging::log->loggingActive(level)) {
Logging::log->getLog(level) << error << endl;
}
ErrorLog::error_log->AppendError(error);
throw InternalServerErrorException(error_message);
}
case REDIRECT: {
error = "Too many redirections occurred. There is probably something"
" wrong with the replication. The last redirect seen came from the"
" server: "
+ (uuid_iterator_has_addresses ? service_address
: ( service_address + " (" + service_uuid + ")"))
+ " and pointed to: " + redirect_target
+ retry_count_msg;
if (Logging::log->loggingActive(level)) {
Logging::log->getLog(level) << error << endl;
}
ErrorLog::error_log->AppendError(error);
throw XtreemFSException(error);
}
case INVALID_VIEW: {
error = "The server "
+ (uuid_iterator_has_addresses ? service_address
: ( service_address + " (" + service_uuid + ")"))
+ " denied the requested operation because the clients view is " +
+ "outdated. The request will be retried once the view is renewed.";
if (Logging::log->loggingActive(level)) {
Logging::log->getLog(level) << error << endl;
}
ErrorLog::error_log->AppendError(error);
throw InvalidViewException(error);
}
default: {
string error_type_name
= boost::lexical_cast<string>(error_type);
const ::google::protobuf::EnumValueDescriptor* enum_desc =
ErrorType_descriptor()->FindValueByNumber(error_type);
if (enum_desc) {
error_type_name = enum_desc->name();
}
error = "The server "
+ (uuid_iterator_has_addresses ? service_address
: ( service_address + " (" + service_uuid + ")"))
+ " returned an error: " + error_type_name
+ " Error: " + error_message + retry_count_msg;
if (Logging::log->loggingActive(level)) {
Logging::log->getLog(level) << error << endl;
}
ErrorLog::error_log->AppendError(error);
throw XtreemFSException(error);
}
}
} else {
// No Response given, probably interrupted.
throw PosixErrorException(
POSIX_ERROR_EINTR,
"The operation (sending a request to the server "
+ (uuid_iterator_has_addresses ? service_address
: ( service_address + " (" + service_uuid + ")"))
+ ") was aborted by the user at attempt: "
// attempt + 1 because the interrupt is only possible after the
// request came back.
+ boost::lexical_cast<string>(attempt + 1) + ".");
} // if (response != NULL)
}
/** Executes the request without delaying the last try and no xcap handler. */
rpc::SyncCallbackBase* ExecuteSyncRequest(
boost::function<rpc::SyncCallbackBase* (const std::string&)> sync_function,
UUIDIterator* uuid_iterator,
UUIDResolver* uuid_resolver,
const RPCOptions& options) {
return ExecuteSyncRequest(sync_function,
uuid_iterator,
uuid_resolver,
options,
false,
NULL,
NULL);
}
/** Executes the request without a xcap handler. */
rpc::SyncCallbackBase* ExecuteSyncRequest(
boost::function<rpc::SyncCallbackBase* (const std::string&)> sync_function,
UUIDIterator* uuid_iterator,
UUIDResolver* uuid_resolver,
const RPCOptions& options,
bool uuid_iterator_has_addresses) {
return ExecuteSyncRequest(sync_function,
uuid_iterator,
uuid_resolver,
options,
uuid_iterator_has_addresses,
NULL,
NULL);
}
} // namespace xtreemfs

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,505 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/file_info.h"
#include <boost/make_shared.hpp>
#include "libxtreemfs/file_handle_implementation.h"
#include "libxtreemfs/helper.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/volume_implementation.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
#include "xtreemfs/MRC.pb.h"
#include "xtreemfs/OSD.pb.h"
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
using namespace std;
namespace xtreemfs {
FileInfo::FileInfo(
ClientImplementation* client,
VolumeImplementation* volume,
uint64_t file_id,
const std::string& path,
bool replicate_on_close,
const xtreemfs::pbrpc::XLocSet& xlocset,
const std::string& client_uuid)
: client_(client),
volume_(volume),
file_id_(file_id),
path_(path),
replicate_on_close_(replicate_on_close),
reference_count_(0),
xlocset_(xlocset),
osd_uuid_iterator_(xlocset),
client_uuid_(client_uuid),
osd_write_response_(NULL),
osd_write_response_status_(kClean),
#ifdef _MSC_VER
// Disable "warning C4355: 'this' : used in base member initializer list".
// We can ignore that warning because we know that AsyncWriteHandler's
// constructor doesn't dereference the pointer passed to it.
#pragma warning(push)
#pragma warning(disable:4355)
#endif // _MSC_VER
async_write_handler_(this,
&osd_uuid_iterator_,
volume->uuid_resolver(),
volume->osd_service_client(),
volume->auth_bogus(),
volume->user_credentials_bogus(),
volume->volume_options(),
client->GetAsyncWriteCallbackQueue()) {
#ifdef _MSC_VER
#pragma warning(pop)
#endif // _MSC_VER
if (volume->volume_options().object_cache_size > 0) {
const int object_size =
xlocset_.replicas(0).striping_policy().stripe_size() * 1024;
object_cache_.reset(
new ObjectCache(volume->volume_options().object_cache_size,
object_size));
}
// Make an UUID container managed by a smart pointer.
osd_uuid_container_ = boost::make_shared<UUIDContainer>(xlocset);
}
FileInfo::~FileInfo() {
assert(active_locks_.size() == 0);
}
FileHandleImplementation* FileInfo::CreateFileHandle(
const xtreemfs::pbrpc::XCap& xcap,
bool async_writes_enabled) {
return CreateFileHandle(xcap, async_writes_enabled, false);
}
FileHandleImplementation* FileInfo::CreateFileHandle(
const xtreemfs::pbrpc::XCap& xcap,
bool async_writes_enabled,
bool used_for_pending_filesize_update) {
FileHandleImplementation* file_handle = new FileHandleImplementation(
client_,
volume_->client_uuid(),
this,
xcap,
volume_->mrc_uuid_iterator(),
&osd_uuid_iterator_,
volume_->uuid_resolver(),
volume_->mrc_service_client(),
volume_->osd_service_client(),
volume_->stripe_translators(),
async_writes_enabled,
object_cache_.get(),
volume_->volume_options(),
volume_->auth_bogus(),
volume_->user_credentials_bogus());
// Add file_handle to list.
if (!used_for_pending_filesize_update) {
boost::mutex::scoped_lock lock_refcount(mutex_);
boost::mutex::scoped_lock lock_fhlist(open_file_handles_mutex_);
++reference_count_;
open_file_handles_.push_back(file_handle);
}
return file_handle;
}
void FileInfo::CloseFileHandle(FileHandleImplementation* file_handle) {
// Pending async writes and file size updates have already been flushed
// by file_handle.
// Remove file handle.
{
boost::mutex::scoped_lock lock_fhlist(open_file_handles_mutex_);
open_file_handles_.remove(file_handle);
}
// Waiting does not require a lock on the open_file_handles_.
file_handle->WaitForAsyncOperations();
// Defer the deletion of file_handle as it might be needed by
// VolumeImplementation::CloseFile() to release all locks.
// At this point the file_handle is already removed from the list of open file
// handles, but the reference_count is not decreased yet. This has to happen
// after locking the open_file_table_ in Volume.
volume_->CloseFile(file_id_, this, file_handle);
}
int FileInfo::DecreaseReferenceCount() {
boost::mutex::scoped_lock lock(mutex_);
--reference_count_;
assert(reference_count_ >= 0);
return reference_count_;
}
void FileInfo::MergeStatAndOSDWriteResponse(xtreemfs::pbrpc::Stat* stat) {
boost::mutex::scoped_lock lock(osd_write_response_mutex_);
if (osd_write_response_.get()) {
// Check if information in Stat is newer than osd_write_response_.
if (stat->truncate_epoch() < osd_write_response_->truncate_epoch() ||
(stat->truncate_epoch() == osd_write_response_->truncate_epoch()
&& stat->size() < osd_write_response_->size_in_bytes())) {
// Information in Stat has to be merged with osd_write_response_.
stat->set_size(osd_write_response_->size_in_bytes());
stat->set_truncate_epoch(osd_write_response_->truncate_epoch());
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "getattr: merged infos from osd_write_response, size: "
<< stat->size() << endl;
}
}
}
}
bool FileInfo::TryToUpdateOSDWriteResponse(
xtreemfs::pbrpc::OSDWriteResponse* response,
const xtreemfs::pbrpc::XCap& xcap) {
assert(response);
boost::mutex::scoped_lock lock(osd_write_response_mutex_);
// Determine the new maximum of osd_write_response_.
if (CompareOSDWriteResponses(response, osd_write_response_.get()) == 1) {
// Take over pointer.
osd_write_response_.reset(response);
osd_write_response_xcap_.CopyFrom(xcap);
osd_write_response_status_ = kDirty;
return true;
} else {
return false;
}
}
void FileInfo::WriteBackFileSizeAsync(const RPCOptions& options) {
boost::mutex::scoped_lock lock(osd_write_response_mutex_);
// Only update pending file size updates.
if (osd_write_response_.get() && osd_write_response_status_ == kDirty) {
FileHandleImplementation* file_handle =
CreateFileHandle(osd_write_response_xcap_, false, true);
pending_filesize_updates_.push_back(file_handle);
osd_write_response_status_ = kDirtyAndAsyncPending;
file_handle->set_osd_write_response_for_async_write_back(
*(osd_write_response_.get()));
file_handle->WriteBackFileSizeAsync(options);
}
}
void FileInfo::RenewXCapsAsync(const RPCOptions& options) {
boost::mutex::scoped_lock lock(open_file_handles_mutex_);
for (list<FileHandleImplementation*>::iterator it =
open_file_handles_.begin();
it != open_file_handles_.end();
++it) {
(*it)->ExecutePeriodTasks(options);
}
}
void FileInfo::GetOSDWriteResponse(
xtreemfs::pbrpc::OSDWriteResponse* response) {
boost::mutex::scoped_lock lock(osd_write_response_mutex_);
if (osd_write_response_) {
response->CopyFrom(*(osd_write_response_.get()));
}
}
void FileInfo::GetPath(std::string* path) {
boost::mutex::scoped_lock lock(mutex_);
*path = path_;
}
void FileInfo::RenamePath(const std::string& path,
const std::string& new_path) {
boost::mutex::scoped_lock lock(mutex_);
if (path_ == path) {
path_ = new_path;
}
}
void FileInfo::WaitForPendingFileSizeUpdates() {
boost::mutex::scoped_lock lock(osd_write_response_mutex_);
WaitForPendingFileSizeUpdatesHelper(&lock);
}
void FileInfo::WaitForPendingFileSizeUpdatesHelper(
boost::mutex::scoped_lock* lock) {
assert(lock->owns_lock());
while (pending_filesize_updates_.size() > 0) {
osd_write_response_cond_.wait(*lock);
}
}
void FileInfo::AsyncFileSizeUpdateResponseHandler(
const xtreemfs::pbrpc::OSDWriteResponse& owr,
FileHandleImplementation* file_handle,
bool success) {
boost::mutex::scoped_lock lock(osd_write_response_mutex_);
// Only change the status if the OSDWriteResponse has not changed meanwhile.
if (CompareOSDWriteResponses(&owr, osd_write_response_.get()) == 0) {
// The status must not have changed.
assert(osd_write_response_status_ == kDirtyAndAsyncPending);
if (success) {
osd_write_response_status_ = kClean;
} else {
osd_write_response_status_ = kDirty; // Still dirty.
}
}
// Always remove the temporary FileHandle.
pending_filesize_updates_.remove(file_handle);
delete file_handle;
if (pending_filesize_updates_.size() == 0) {
osd_write_response_cond_.notify_all();
}
}
void FileInfo::GetAttr(const xtreemfs::pbrpc::UserCredentials& user_credentials,
xtreemfs::pbrpc::Stat* stat) {
string path;
GetPath(&path);
volume_->GetAttr(user_credentials, path, false, stat, this);
}
void FileInfo::Flush(FileHandleImplementation* file_handle) {
Flush(file_handle, false);
}
void FileInfo::Flush(FileHandleImplementation* file_handle, bool close_file) {
// We don't wait only for file_handle's pending writes but for all writes of
// this file.
WaitForPendingAsyncWrites();
FlushPendingFileSizeUpdate(file_handle, close_file);
}
void FileInfo::FlushPendingFileSizeUpdate(
FileHandleImplementation* file_handle) {
FlushPendingFileSizeUpdate(file_handle, false);
}
void FileInfo::FlushPendingFileSizeUpdate(FileHandleImplementation* file_handle,
bool close_file) {
// File size write back.
boost::mutex::scoped_lock lock(osd_write_response_mutex_);
bool no_response_sent = true;
if (osd_write_response_.get()) {
WaitForPendingFileSizeUpdatesHelper(&lock);
if (osd_write_response_status_ == kDirty) {
osd_write_response_status_ = kDirtyAndSyncPending;
// Create a copy of OSDWriteResponse to pass to FileHandle.
OSDWriteResponse response_copy(*(osd_write_response_.get()));
lock.unlock();
try {
file_handle->WriteBackFileSize(response_copy, close_file);
} catch (const XtreemFSException&) {
osd_write_response_status_ = kDirty;
throw; // Rethrow error.
}
lock.lock();
no_response_sent = false;
// Only update the status if the response object has not changed
// meanwhile.
if (CompareOSDWriteResponses(osd_write_response_.get(),
&response_copy) == 0) {
osd_write_response_status_ = kClean;
}
}
}
if (no_response_sent && close_file && replicate_on_close_) {
// Send an explicit close only if the on-close-replication should be
// triggered. Use an empty OSDWriteResponse object therefore.
OSDWriteResponse empty_osd_write_response;
file_handle->WriteBackFileSize(empty_osd_write_response, close_file);
}
}
void FileInfo::CheckLock(const xtreemfs::pbrpc::Lock& lock,
xtreemfs::pbrpc::Lock* conflicting_lock,
bool* lock_for_pid_cached,
bool* cached_lock_for_pid_equal,
bool* conflict_found) {
assert(conflicting_lock);
assert(lock_for_pid_cached);
assert(cached_lock_for_pid_equal);
assert(lock.client_uuid() == client_uuid_);
boost::mutex::scoped_lock mutex_lock(active_locks_mutex_);
*cached_lock_for_pid_equal = false;
*conflict_found = false;
*lock_for_pid_cached = false;
for (map<unsigned int, Lock*>::iterator it = active_locks_.begin();
it != active_locks_.end();
++it) {
if (it->first == lock.client_pid()) {
*lock_for_pid_cached = true;
if (CheckIfLocksAreEqual(lock, *(it->second))) {
*cached_lock_for_pid_equal = true;
}
continue;
}
if (CheckIfLocksDoConflict(lock, *(it->second))) {
*conflict_found = true;
conflicting_lock->CopyFrom(*(it->second));
// A conflicting lock has a higher priority than a cached lock with the
// same PID.
break;
}
}
}
bool FileInfo::CheckIfProcessHasLocks(int process_id) {
boost::mutex::scoped_lock mutex_lock(active_locks_mutex_);
// There may be only up to one lock per process_id. No loop required.
map<unsigned int, Lock*>::const_iterator it = active_locks_.find(process_id);
return it != active_locks_.end();
}
void FileInfo::PutLock(const xtreemfs::pbrpc::Lock& lock) {
assert(lock.client_uuid() == client_uuid_);
boost::mutex::scoped_lock mutex_lock(active_locks_mutex_);
map<unsigned int, Lock*>::iterator it = active_locks_.find(lock.client_pid());
if (it != active_locks_.end()) {
delete it->second;
active_locks_.erase(it);
}
Lock* new_lock = new Lock(lock);
active_locks_[lock.client_pid()] = new_lock;
}
void FileInfo::DelLock(const xtreemfs::pbrpc::Lock& lock) {
assert(lock.client_uuid() == client_uuid_);
boost::mutex::scoped_lock mutex_lock(active_locks_mutex_);
map<unsigned int, Lock*>::iterator it = active_locks_.find(lock.client_pid());
if (it != active_locks_.end()) {
// Only up to one lock per PID. If its unlocked, just delete it.
delete it->second;
active_locks_.erase(it);
}
}
void FileInfo::ReleaseLockOfProcess(FileHandleImplementation* file_handle,
int process_id) {
boost::mutex::scoped_lock mutex_lock(active_locks_mutex_);
// There may be only up to one lock per process_id. No loop required.
map<unsigned int, Lock*>::iterator it = active_locks_.find(process_id);
if (it != active_locks_.end()) {
Lock lock(*(it->second));
// Leave critical section.
mutex_lock.unlock();
file_handle->ReleaseLock(lock);
}
}
void FileInfo::ReleaseAllLocks(FileHandleImplementation* file_handle) {
// Do not use pointers here to ensure the deletion of this list - otherwise
// a ReleaseLock() may fail and the memory wont be freed.
list<Lock> active_locks_copy;
{
// Create a copy to avoid longer locking periods and ensure that ReleaseLock
// can delete the lock from active_locks_ without invalidating the iterator.
boost::mutex::scoped_lock mutex_lock(active_locks_mutex_);
for (map<unsigned int, Lock*>::iterator it = active_locks_.begin();
it != active_locks_.end();
++it) {
active_locks_copy.push_back((*(it->second)));
}
}
for (list<Lock>::const_iterator it = active_locks_copy.begin();
it != active_locks_copy.end();
++it) {
// The lock itself will be deleted by ReleaseLock.
file_handle->ReleaseLock(*it);
}
}
void FileInfo::AsyncWrite(AsyncWriteBuffer* write_buffer) {
async_write_handler_.Write(write_buffer);
}
void FileInfo::WaitForPendingAsyncWrites() {
async_write_handler_.WaitForPendingWrites();
}
bool FileInfo::WaitForPendingAsyncWritesNonBlocking(
boost::condition* condition_variable,
bool* wait_completed,
boost::mutex* wait_completed_mutex) {
return async_write_handler_.
WaitForPendingWritesNonBlocking(condition_variable,
wait_completed,
wait_completed_mutex);
}
void FileInfo::UpdateXLocSetAndRest(const xtreemfs::pbrpc::XLocSet& new_xlocset,
bool replicate_on_close) {
boost::mutex::scoped_lock lock(xlocset_mutex_);
xlocset_.CopyFrom(new_xlocset);
osd_uuid_iterator_.ClearAndGetOSDUUIDsFromXlocSet(new_xlocset);
osd_uuid_container_ = boost::make_shared<UUIDContainer>(new_xlocset);
replicate_on_close_ = replicate_on_close;
}
void FileInfo::UpdateXLocSetAndRest(
const xtreemfs::pbrpc::XLocSet& new_xlocset) {
boost::mutex::scoped_lock lock(xlocset_mutex_);
xlocset_.CopyFrom(new_xlocset);
osd_uuid_iterator_.ClearAndGetOSDUUIDsFromXlocSet(new_xlocset);
osd_uuid_container_ = boost::make_shared<UUIDContainer>(new_xlocset);
}
void FileInfo::GetXLocSet(xtreemfs::pbrpc::XLocSet* new_xlocset) {
assert(new_xlocset);
boost::mutex::scoped_lock lock(xlocset_mutex_);
new_xlocset->CopyFrom(xlocset_);
}
boost::shared_ptr<UUIDContainer> FileInfo::GetXLocSetAndUUIDContainer(xtreemfs::pbrpc::XLocSet* new_xlocset) {
assert(new_xlocset);
boost::mutex::scoped_lock lock(xlocset_mutex_);
new_xlocset->CopyFrom(xlocset_);
return osd_uuid_container_;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,583 @@
/*
* Copyright (c) 2010-2011 by Patrick Schaefer, Zuse Institute Berlin
* 2011-2014 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/helper.h"
#include <cstdio>
#include <cstdlib>
#include <stdint.h>
#include <boost/lexical_cast.hpp>
#include <iostream>
#include <string>
#include "libxtreemfs/options.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include <boost/algorithm/string.hpp>
#include "rpc/sync_callback.h"
#include "util/logging.h"
#include "xtreemfs/GlobalTypes.pb.h"
#include "xtreemfs/MRC.pb.h"
#include "xtreemfs/OSD.pb.h"
#ifdef __APPLE__
#include <sys/utsname.h>
#endif // __APPLE__
#ifdef WIN32
#define NOMINMAX
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif // WIN32
#ifdef __linux__
#include <arpa/inet.h>
#include <ifaddrs.h>
#include <netdb.h>
#include <sys/socket.h>
#endif // __linux__
using namespace std;
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
namespace xtreemfs {
int CompareOSDWriteResponses(
const xtreemfs::pbrpc::OSDWriteResponse* new_response,
const xtreemfs::pbrpc::OSDWriteResponse* current_response) {
if (new_response == NULL && current_response == NULL) {
return 0;
} else if (new_response != NULL && current_response == NULL) {
// new_response > current_response.
return 1;
} else if (new_response == NULL && current_response != NULL) {
// new_response < current_response.
return -1;
} else if (
new_response->truncate_epoch() > current_response->truncate_epoch() ||
(new_response->truncate_epoch() == current_response->truncate_epoch()
&& new_response->size_in_bytes() > current_response->size_in_bytes())) {
// new_response > current_response.
return 1;
} else if (
new_response->truncate_epoch() < current_response->truncate_epoch() ||
(new_response->truncate_epoch() == current_response->truncate_epoch()
&& new_response->size_in_bytes() < current_response->size_in_bytes())) {
// new_response < current_response.
return -1;
} else {
// new_response == current_response.
return 0;
}
}
/** The XCap contains the Volume UUID and File ID concatenated by a ":". */
uint64_t ExtractFileIdFromXCap(const xtreemfs::pbrpc::XCap& xcap) {
string string = xcap.file_id();
int start = string.find(":") + 1;
int length = string.length() - start;
return boost::lexical_cast<uint64_t>(
string.substr(start, length));
}
std::string ResolveParentDirectory(const std::string& path) {
int last_slash = path.find_last_of("/");
if (path == "/" || last_slash == 0) {
return "/";
} else {
return path.substr(0, last_slash);
}
}
std::string GetBasename(const std::string& path) {
int last_slash = path.find_last_of("/");
if (path == "/") {
return "/";
} else {
// We don't allow path to have a trailing "/".
assert(last_slash != (path.length() - 1));
return path.substr(last_slash + 1);
}
}
std::string ConcatenatePath(const std::string& directory,
const std::string& file) {
// handle .. and .
if (file == ".") {
return directory;
} else if (file == "..") {
if (directory == "/") {
return directory;
}
return directory.substr(0, directory.find_last_of("/"));
}
if (directory == "/") {
return "/" + file;
} else {
return directory + "/" + file;
}
}
std::string GetOSDUUIDFromXlocSet(const xtreemfs::pbrpc::XLocSet& xlocs,
uint32_t replica_index,
uint32_t stripe_index) {
if (xlocs.replicas_size() == 0) {
Logging::log->getLog(LEVEL_ERROR)
<< "GetOSDUUIDFromXlocSet: Empty replicas list in XlocSet: "
<< xlocs.DebugString() << endl;
return "";
}
const xtreemfs::pbrpc::Replica& replica = xlocs.replicas(replica_index);
if (replica.osd_uuids_size() == 0) {
Logging::log->getLog(LEVEL_ERROR)
<< "GetOSDUUIDFromXlocSet: No head OSD available in XlocSet:"
<< xlocs.DebugString() << endl;
return "";
}
return replica.osd_uuids(stripe_index);
}
std::string GetOSDUUIDFromXlocSet(
const xtreemfs::pbrpc::XLocSet& xlocs) {
// Get the UUID for the first replica (r=0) and the head OSD (i.e. the first
// chunk, c=0).
return GetOSDUUIDFromXlocSet(xlocs, 0, 0);
}
std::string StripePolicyTypeToString(xtreemfs::pbrpc::StripingPolicyType policy) {
std::string policyMap[] = { "STRIPING_POLICY_RAID0" };
return policyMap[policy];
}
/**
* By default this function does read random data from /dev/urandom and falls
* back to using C's rand() if /dev/random is not available.
*/
void GenerateVersion4UUID(std::string* result) {
FILE *urandom = fopen("/dev/urandom", "r");
if (!urandom) {
// Use rand() instead if /dev/urandom not available.
srand(static_cast<unsigned int>(time(NULL)));
}
// Base62 characters for UUID generation.
char set[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
uint32_t block_length[] = {8, 4, 4, 4, 12};
uint32_t block_length_count = 5;
char uuid[37];
uint64_t random_value;
int pos = 0;
for (uint32_t j = 0; j < block_length_count; j++) {
for (uint32_t i = 0; i < block_length[j]; i++) {
// Read random number.
if (urandom) {
fread(&random_value, 1, sizeof(random_value), urandom);
} else {
// Use C's rand() if /dev/urandom not available.
random_value = rand(); // NOLINT
}
uuid[pos] = set[random_value % 62];
pos++;
}
uuid[pos++] = '-';
}
uuid[36] = '\0';
*result = string(uuid);
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) <<
"Generated client UUID: " << uuid << endl;
}
if (urandom) {
fclose(urandom);
}
}
void InitializeStat(xtreemfs::pbrpc::Stat* stat) {
stat->set_dev(0);
stat->set_ino(0);
stat->set_mode(0);
// If not set to 1, an assertion in the metadata cache will be triggered.
stat->set_nlink(1);
stat->set_user_id("");
stat->set_group_id("");
stat->set_size(0);
stat->set_atime_ns(0);
stat->set_mtime_ns(0);
stat->set_ctime_ns(0);
stat->set_blksize(0);
stat->set_truncate_epoch(0);
}
bool CheckIfLocksAreEqual(const xtreemfs::pbrpc::Lock& lock1,
const xtreemfs::pbrpc::Lock& lock2) {
return //lock1 != NULL && lock2 != NULL &&
lock1.client_uuid() == lock2.client_uuid()
&& lock1.client_pid() == lock2.client_pid()
&& lock1.offset() == lock2.offset()
&& lock1.length() == lock2.length();
}
bool CheckIfLocksDoConflict(const xtreemfs::pbrpc::Lock& lock1,
const xtreemfs::pbrpc::Lock& lock2) {
// 0 means to lock till the end of the file.
uint64_t lock1_end = lock1.length() == 0 ? 0 :
lock1.offset() + lock1.length();
uint64_t lock2_end = lock2.length() == 0 ? 0 :
lock2.offset() + lock2.length();
// Check for overlaps.
if (lock1_end == 0) {
if (lock2_end >= lock1.offset() || lock2_end == 0) {
return true;
}
}
if (lock2_end == 0) {
if (lock1_end >= lock2.offset() || lock1_end == 0) {
return true;
}
}
// Overlapping?
if (!(lock1_end < lock2.offset() || lock1.offset() > lock2_end)) {
// Does overlap, check for conflicting modes.
return lock1.exclusive() || lock2.exclusive();
}
return false;
}
bool CheckIfUnsignedInteger(const std::string& string) {
if (string.empty()) {
return false;
}
try {
// It's needed to use a 64 bit signed integer to detect a -(2^31)-1
// as a negative value and not as an overflowed unsigned integer of
// value 2^32-1.
int64_t integer = boost::lexical_cast<int64_t>(string);
// If casted to uint, no bad_lexical_cast is thrown for negative values -
// therefore we check for them on our own.
if (integer < 0) {
return false;
}
} catch(const boost::bad_lexical_cast&) {
return false;
}
return true; // It actually was an unsigned integer.
}
RPCOptions RPCOptionsFromOptions(const Options& options) {
return RPCOptions(options.max_tries,
options.retry_delay_s,
false, // do not delay last attempt
options.was_interrupted_function);
}
#ifdef __APPLE__
int GetMacOSXKernelVersion() {
int darwin_kernel_version = -1;
struct utsname uname_result;
uname(&uname_result);
string darwin_release(uname_result.release);
size_t first_dot = darwin_release.find_first_of(".");
try {
darwin_kernel_version = boost::lexical_cast<int>(
darwin_release.substr(0, first_dot));
} catch(const boost::bad_lexical_cast& e) {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN) << "Failed to retrieve the kernel "
"version, got: " << darwin_kernel_version << endl;
}
}
return darwin_kernel_version;
}
#endif // __APPLE__
#ifdef WIN32
std::string ConvertWindowsToUTF8(const wchar_t* windows_string) {
string utf8;
ConvertWindowsToUTF8(windows_string, &utf8);
return utf8;
}
void ConvertWindowsToUTF8(const wchar_t* from,
std::string* utf8) {
// Assume that most strings will fit into a kDefaultBufferSize sized buffer.
// If not, the buffer will be increased.
const size_t kDefaultBufferSize = 1024;
// resize() does not count the null-terminating char, WideCharTo... does.
utf8->resize(kDefaultBufferSize - 1);
int r = WideCharToMultiByte(CP_UTF8,
0,
from,
-1,
&((*utf8)[0]),
kDefaultBufferSize,
0,
0);
if (r == 0) {
throw XtreemFSException("Failed to convert a UTF-16"
" (wide character) string to an UTF8 string."
" Error code: "
+ boost::lexical_cast<string>(::GetLastError()));
}
utf8->resize(r - 1);
if (r > kDefaultBufferSize) {
int r2 = WideCharToMultiByte(CP_UTF8, 0, from, -1, &((*utf8)[0]), r, 0, 0);
if (r != r2 || r2 == 0) {
throw XtreemFSException("Failed to convert a UTF-16"
" (wide character) string to an UTF8 string."
" Error code: "
+ boost::lexical_cast<string>(::GetLastError()));
}
}
}
void ConvertUTF8ToWindows(const std::string& utf8,
wchar_t* buf,
int buffer_size) {
int r = MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), -1, buf, buffer_size);
if (r == 0) {
throw XtreemFSException("Failed to convert this UTF8 string to a UTF-16"
" (wide character) string: " + utf8
+ " Error code: "
+ boost::lexical_cast<string>(::GetLastError()));
}
}
std::wstring ConvertUTF8ToWindows(const std::string& utf8) {
wstring win;
ConvertUTF8ToWindows(utf8, &win);
return win;
}
void ConvertUTF8ToWindows(const std::string& utf8,
std::wstring* win) {
// Assume that most strings will fit into a kDefaultBufferSize sized buffer.
// If not, the buffer will be increased.
const size_t kDefaultBufferSize = 1024;
// resize() does not count the null-terminating char, MultiByteToWide... does.
win->resize(kDefaultBufferSize - 1);
int r = MultiByteToWideChar(CP_UTF8,
0,
utf8.c_str(),
-1,
&((*win)[0]),
kDefaultBufferSize);
if (r == 0) {
throw XtreemFSException("Failed to convert a UTF-8"
" string to an UTF16 string (wide character)."
" Error code: "
+ boost::lexical_cast<string>(::GetLastError()));
}
win->resize(r - 1);
if (r > kDefaultBufferSize) {
int r2 = MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), -1, &((*win)[0]), r);
if (r != r2 || r2 == 0) {
throw XtreemFSException("Failed to convert a UTF-8"
" string to an UTF16 string (wide character)."
" Error code: "
+ boost::lexical_cast<string>(::GetLastError()));
}
}
}
#endif // WIN32
/** Returns the number of ones in the array "netmask" as network prefix. */
int GetNetworkPrefixUnix(const uint32_t* netmask, size_t length) {
// Iterate over "netmask" in chunks of uint32_t.
int c = 0;
for (uint32_t i = 0; i < length / sizeof(uint32_t); i++) {
uint32_t v = *(reinterpret_cast<const uint32_t*>(netmask + i));
for (; v; c++) {
v &= v - 1; // Clear the least significant bit set.
}
}
return c;
}
/** Masks "address" with "netmask" (AND) and produces "network_address". */
void BitwiseAndOfAddressses(
char* address, char* netmask, char* network_address, size_t length) {
// Process data in chunks of 1 byte chars.
for (size_t i = 0; i < length / sizeof(char); i++) {
network_address[i] = address[i] & netmask[i];
}
}
#ifdef __linux__
std::string GetNetworkStringUnix(const struct ifaddrs* ifaddr) {
assert(ifaddr->ifa_addr);
assert(ifaddr->ifa_netmask);
assert(ifaddr->ifa_addr->sa_family == ifaddr->ifa_netmask->sa_family);
ostringstream network;
// Network address.
char ip_printable[NI_MAXHOST];
int result = -1;
if (ifaddr->ifa_netmask->sa_family == AF_INET) {
struct sockaddr_in network_address = {};
network_address.sin_family = ifaddr->ifa_netmask->sa_family;
BitwiseAndOfAddressses(
reinterpret_cast<char*>(&reinterpret_cast<struct sockaddr_in*>(
ifaddr->ifa_addr)->sin_addr),
reinterpret_cast<char*>(&reinterpret_cast<struct sockaddr_in*>(
ifaddr->ifa_netmask)->sin_addr),
reinterpret_cast<char*>(&network_address.sin_addr),
sizeof(network_address.sin_addr));
result = getnameinfo(reinterpret_cast<struct sockaddr*>(&network_address),
sizeof(network_address),
ip_printable,
NI_MAXHOST,
NULL,
0,
NI_NUMERICHOST);
} else if (ifaddr->ifa_netmask->sa_family == AF_INET6) {
struct sockaddr_in6 network_address = {};
network_address.sin6_family = ifaddr->ifa_netmask->sa_family;
BitwiseAndOfAddressses(
reinterpret_cast<char*>(&reinterpret_cast<struct sockaddr_in6*>(
ifaddr->ifa_addr)->sin6_addr),
reinterpret_cast<char*>(&reinterpret_cast<struct sockaddr_in6*>(
ifaddr->ifa_netmask)->sin6_addr),
reinterpret_cast<char*>(&network_address.sin6_addr),
sizeof(network_address.sin6_addr));
result = getnameinfo(reinterpret_cast<struct sockaddr*>(&network_address),
sizeof(network_address),
ip_printable,
NI_MAXHOST,
NULL,
0,
NI_NUMERICHOST);
} else {
assert(ifaddr->ifa_netmask->sa_family == AF_INET ||
ifaddr->ifa_netmask->sa_family == AF_INET6);
}
if (result == 0) {
network << ip_printable;
} else {
throw XtreemFSException("Failed to convert an IP address from the internal"
" network order representation to the printable text presentation."
" Error: " + boost::lexical_cast<string>(result));
}
// Separator.
network << "/";
// Prefix.
if (ifaddr->ifa_netmask->sa_family == AF_INET) {
struct in_addr netmask =
reinterpret_cast<struct sockaddr_in*>(ifaddr->ifa_netmask)->sin_addr;
network << GetNetworkPrefixUnix(reinterpret_cast<uint32_t*>(&netmask),
sizeof(netmask));
} else if (ifaddr->ifa_netmask->sa_family == AF_INET6) {
struct in6_addr netmask =
reinterpret_cast<struct sockaddr_in6*>(ifaddr->ifa_netmask)->sin6_addr;
network << GetNetworkPrefixUnix(reinterpret_cast<uint32_t*>(&netmask),
sizeof(netmask));
} else {
assert(ifaddr->ifa_netmask->sa_family == AF_INET ||
ifaddr->ifa_netmask->sa_family == AF_INET6);
}
return network.str();
}
#endif // __linux__
boost::unordered_set<std::string> GetNetworks() {
boost::unordered_set<std::string> result;
#ifdef __linux__
struct ifaddrs* ifaddr = NULL;
if (getifaddrs(&ifaddr) == -1) {
freeifaddrs(ifaddr);
throw XtreemFSException("Failed to get the list of network interfaces."
" Error: " + boost::lexical_cast<string>(errno));
}
for (struct ifaddrs* ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
if (ifa->ifa_addr == NULL) {
continue;
}
if (ifa->ifa_addr->sa_family == AF_INET ||
ifa->ifa_addr->sa_family == AF_INET6) {
try {
result.insert(GetNetworkStringUnix(ifa));
} catch (const XtreemFSException& e) {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN) << "Converting the information about"
" the network interface: " << ifa->ifa_name << " with the"
" family: " << ifa->ifa_addr->sa_family << " failed."
" Error: " << e.what() << " The device was ignored." << endl;
}
}
}
}
freeifaddrs(ifaddr);
#endif // __linux__
return result;
}
/**
* Parses human-readable byte number to byte count. Returns -1 if byte_number is not parsable.
*/
long parseByteNumber(std::string byte_number) {
std::string multiplier;
long long coeff;
std::stringstream ss;
ss << byte_number;
ss >> coeff;
ss >> multiplier;
boost::to_upper(multiplier);
if (multiplier.length() == 0 || multiplier == "B"){
return coeff;
}
if (multiplier.length() > 2 || (multiplier.length() == 2 && multiplier[1] != 'B')) {
return -1;
}
long factor = 1L;
switch (multiplier[0]) {
case 'K': factor = 1024L; break;
case 'M': factor = 1024L*1024L; break;
case 'G': factor = 1024L*1024L*1024L; break;
case 'T': factor = 1024L*1024L*1024L*1024L; break;
case 'P': factor = 1024L*1024L*1024L*1024L*1024L; break;
case 'E': factor = 1024L*1024L*1024L*1024L*1024L*1024L; break;
default: return -1;
}
return coeff * factor;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,35 @@
/*
* Copyright (c) 2012 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/interrupt.h"
#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <boost/thread/thread.hpp>
#include "libxtreemfs/execute_sync_request.h"
namespace xtreemfs {
bool Interruptibilizer::WasInterrupted(InterruptedCallback cb) {
return cb == NULL ? false : cb() == 1;
}
void Interruptibilizer::SleepInterruptible(int64_t rel_time_ms,
InterruptedCallback cb) {
const int sleep_interval_ms = 2000;
int64_t wait_time;
while (rel_time_ms > 0 && !Interruptibilizer::WasInterrupted(cb)) {
wait_time = rel_time_ms > sleep_interval_ms ? sleep_interval_ms
: rel_time_ms;
rel_time_ms -= wait_time;
boost::this_thread::sleep(boost::posix_time::millisec(wait_time));
}
}
} // namespace xtreemfs

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,22 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/metadata_cache_entry.h"
#include "xtreemfs/MRC.pb.h"
namespace xtreemfs {
MetadataCacheEntry::MetadataCacheEntry()
: dir_entries(NULL), stat(NULL), xattrs(NULL) {}
MetadataCacheEntry::~MetadataCacheEntry() {
delete dir_entries;
delete stat;
delete xattrs;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,271 @@
/*
* Copyright (c) 2013 by Felix Hupfeld.
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/object_cache.h"
#include <stdint.h>
#include <boost/scoped_array.hpp>
#include <boost/thread/condition.hpp>
#include <boost/thread/mutex.hpp>
#include <map>
#include "libxtreemfs/xtreemfs_exception.h"
namespace xtreemfs {
static uint64_t Now() {
return time(NULL);
}
CachedObject::CachedObject(int object_no, int object_size)
: object_no_(object_no), object_size_(object_size), actual_size_(-1),
is_dirty_(false), last_access_(Now()), read_has_failed_(false) {
}
CachedObject::~CachedObject() {
}
// Flush data and free memory
void CachedObject::FlushAndErase(const ObjectWriterFunction& writer) {
boost::unique_lock<boost::mutex> lock(mutex_);
if (is_dirty_) {
WriteObjectToOSD(writer);
}
DropLocked();
}
void CachedObject::Drop() {
boost::unique_lock<boost::mutex> lock(mutex_);
DropLocked();
}
void CachedObject::DropLocked() {
is_dirty_ = false;
actual_size_ = -1;
data_.reset(NULL);
}
int CachedObject::Read(int offset_in_object,
char* buffer,
int bytes_to_read,
const ObjectReaderFunction& reader) {
boost::unique_lock<boost::mutex> lock(mutex_);
ReadInternal(lock, reader);
int actual_bytes = std::min(bytes_to_read, actual_size_ - offset_in_object);
memcpy(buffer, &data_[offset_in_object], actual_bytes);
last_access_ = Now();
return actual_bytes;
}
void CachedObject::Write(int offset_in_object,
const char* buffer,
int bytes_to_write,
const ObjectReaderFunction& reader) {
boost::unique_lock<boost::mutex> lock(mutex_);
// This can be optimized by not triggering a read for a full object write.
ReadInternal(lock, reader);
memcpy(&data_[offset_in_object], buffer, bytes_to_write);
actual_size_ = std::max(actual_size_, offset_in_object + bytes_to_write);
is_dirty_ = true;
last_access_ = Now();
}
void CachedObject::Flush(const ObjectWriterFunction& writer) {
boost::unique_lock<boost::mutex> lock(mutex_);
if (is_dirty_) {
// Write out a copy of the data. In case of error (i.e. an exception)
// we unwind the stack and do not mark the object as clean.
WriteObjectToOSD(writer);
is_dirty_ = false;
// Other threads can continue to work with the buffer.
// Another flush can happen in the meantime.
}
}
void CachedObject::Truncate(int new_object_size) {
boost::unique_lock<boost::mutex> lock(mutex_);
if (actual_size_ == new_object_size) {
return;
} else if (actual_size_ < new_object_size) {
if (actual_size_ == -1) {
data_.reset(new char[object_size_]);
actual_size_ = 0;
}
// Zero out extra data, because we might truncate-extend the file
// again and we do not zero-out data when shrinking.
memset(&data_[actual_size_], 0, new_object_size - actual_size_);
}
// Nothing to do if actual_size_ > new_object_size, because we won't
// read beyond the end of the data.
actual_size_ = new_object_size;
}
uint64_t CachedObject::last_access() {
boost::unique_lock<boost::mutex> lock(mutex_);
return last_access_;
}
bool CachedObject::is_dirty() {
boost::unique_lock<boost::mutex> lock(mutex_);
return is_dirty_;
}
bool CachedObject::has_data() {
boost::unique_lock<boost::mutex> lock(mutex_);
return actual_size_ > 0;
}
void CachedObject::ReadInternal(boost::unique_lock<boost::mutex>& lock,
const ObjectReaderFunction& reader) {
// We hold the lock here, so no other thread is modifying the current
// state. However, another thread might already be requesting the data
// from the OSD.
if (actual_size_ == -1) {
// We don't have valid data yet.
if (data_.get() == NULL) {
// Initial read. No other thread is retrieving the object.
data_.reset(new char[object_size_]);
memset(data_.get(), 0, object_size_);
// No other thread will access the buffer concurrently.
char* buffer_ptr = data_.get();
int read_bytes = -1;
try {
lock.unlock();
read_bytes = reader(object_no_, buffer_ptr);
lock.lock();
} catch(const XtreemFSException&) {
lock.lock();
read_has_failed_ = true;
}
actual_size_ = read_bytes;
} else {
// Read already initiated by another thread. Enqueue us as waiting
// for the data. Our predecessor will dequeue us and wake us up.
boost::condition_variable* v = new boost::condition_variable();
read_queue_.push_back(v);
v->wait(lock); // unlocks mutex
delete v; // has been dequeued already by our predecessor
}
}
// We are done, next in line please.
if (read_queue_.size() > 0) {
// Wake up our successor in the queue. It will make progress when
// we release the lock. The variable still belongs to the thread that enqueued it.
boost::condition_variable* v = read_queue_.front();
read_queue_.pop_front();
v->notify_one();
}
if (read_has_failed_) {
throw IOException("The object cache failed to read the object. "
"Please re-open the file and try again.");
}
}
void CachedObject::WriteObjectToOSD(const ObjectWriterFunction& writer) {
writer(object_no_, data_.get(), actual_size_);
}
ObjectCache::ObjectCache(size_t max_objects, int object_size)
: max_objects_(max_objects), object_size_(object_size) {
}
ObjectCache::~ObjectCache() {
for (Cache::iterator i = cache_.begin(); i != cache_.end(); ++i) {
delete i->second;
}
}
int ObjectCache::Read(int object_no, int offset_in_object,
char* buffer, int bytes_to_read,
const ObjectReaderFunction& reader,
const ObjectWriterFunction& writer) {
assert(bytes_to_read + offset_in_object <= object_size_);
CachedObject* object = LookupObject(object_no, writer);
int read_bytes = object->Read(
offset_in_object, buffer, bytes_to_read, reader);
EvictObjects(writer);
return read_bytes;
}
void ObjectCache::Write(int object_no, int offset_in_object,
const char* buffer, int bytes_to_write,
const ObjectReaderFunction& reader,
const ObjectWriterFunction& writer) {
assert(bytes_to_write + offset_in_object <= object_size_);
CachedObject* object = LookupObject(object_no, writer);
object->Write(offset_in_object, buffer, bytes_to_write, reader);
EvictObjects(writer);
}
void ObjectCache::Flush(const ObjectWriterFunction& writer) {
boost::unique_lock<boost::mutex> lock(mutex_);
Cache::iterator i;
for (i = cache_.begin(); i != cache_.end(); ++i) {
CachedObject* object = i->second;
object->Flush(writer);
}
}
void ObjectCache::Truncate(int64_t new_size) {
boost::unique_lock<boost::mutex> lock(mutex_);
int object_to_cut = static_cast<int>(new_size / object_size_);
for (Cache::iterator i = cache_.begin(); i != cache_.end(); ++i) {
if (i->first == object_to_cut) {
i->second->Truncate(new_size % object_size_);
} else if (i->first < object_to_cut) {
// Extend the object to its full size, if it isn't already.
i->second->Truncate(object_size_);
} else if (i->first > object_to_cut) {
i->second->Drop();
}
}
}
CachedObject* ObjectCache::LookupObject(int object_no,
const ObjectWriterFunction& writer) {
boost::unique_lock<boost::mutex> lock(mutex_);
Cache::iterator i = cache_.find(object_no);
if (i == cache_.end()) {
cache_[object_no] = new CachedObject(object_no, object_size_);
}
return cache_[object_no];
}
void ObjectCache::EvictObjects(const ObjectWriterFunction& writer) {
uint64_t minimum_atime = std::numeric_limits<uint64_t>::max();
Cache::iterator entry_with_minimum_atime;
{
boost::unique_lock<boost::mutex> lock(mutex_);
size_t objects_with_data = 0;
for (Cache::iterator i = cache_.begin(); i != cache_.end(); ++i) {
if (!i->second->has_data()) {
continue;
}
++objects_with_data;
if (minimum_atime > i->second->last_access()) {
entry_with_minimum_atime = i;
minimum_atime = i->second->last_access();
}
}
if (objects_with_data <= max_objects_) {
return;
}
assert(entry_with_minimum_atime != cache_.end());
}
// We only free the object's data, not its container in order to simplify
// concurrency handling.
entry_with_minimum_atime->second->FlushAndErase(writer);
}
int ObjectCache::object_size() const {
return object_size_;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,730 @@
/*
* Copyright (c) 2010-2011 by Patrick Schaefer, Zuse Institute Berlin
* 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/options.h"
#include <algorithm> // std::find_if
#include <boost/algorithm/string/compare.hpp>
#include <boost/algorithm/string.hpp> // boost::algorithm::starts_with
#include <boost/bind.hpp>
#include <boost/program_options/cmdline.hpp>
#include <boost/tokenizer.hpp>
#include <iostream>
#include <string>
#ifdef __APPLE__
// for getpwuid
#include <sys/types.h>
#include <pwd.h>
#else
// for getenv
#include <cstdlib>
#endif
#include "rpc/ssl_options.h"
#include "libxtreemfs/pbrpc_url.h"
#include "libxtreemfs/version_management.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
#include "xtreemfs/GlobalTypes.pb.h"
using namespace std;
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
namespace alg = boost::algorithm;
namespace po = boost::program_options;
namespace style = boost::program_options::command_line_style;
namespace xtreemfs {
Options::Options()
: general_("General options"),
optimizations_("Optimizations"),
error_handling_("Error Handling options"),
#ifdef HAS_OPENSSL
ssl_options_("SSL options"),
#endif // HAS_OPENSSL
grid_options_("Grid Support options"),
vivaldi_options_("Vivaldi Options"),
xtreemfs_advanced_options_("XtreemFS Advanced options"),
alternative_options_("Alternative Specification of options") {
version_string = XTREEMFS_VERSION_STRING;
// XtreemFS URL Options.
xtreemfs_url = "";
volume_name = "";
protocol = "";
mount_point = "";
// General options.
log_level_string = "WARN";
log_file_path = "";
show_help = false;
empty_arguments_list = false;
show_version = false;
// Optimizations.
metadata_cache_size = 100000;
metadata_cache_ttl_s = 10;
enable_async_writes = false;
async_writes_max_request_size_kb = 128; // default object size in kB.
async_writes_max_requests = 10; // Only 10 pending requests allowed by default.
readdir_chunk_size = 1024;
enable_atime = false;
object_cache_size = 0;
// Error Handling options.
// A RPC call may be retried up to "max{_read|_write|}_tries" times. The
// different parameters are considered depending on the operation
// (read, write, rest). Different parameters were introduced because Fuse
// under Linux does not allow to interrupt read() requests if the disk cache
// is involved and therefore it's not wise to retry read() requests by
// default.
//
// A RPC call will block at least for the minimum out of
// ("retry_delay_s", "connect_timeout_s", "request_timeout_s") and at most
// for the maximum out of the three parameters.
//
// The parameter "retry_delay_s" exists to enforce a lower bound and avoid
// flooding the server. For example, an unsuccessful connect may return much
// earlier than "connect_timeout_s" (e.g. in most cases a connect with the
// error 'connection refused' returns immediately.).
max_tries = 40;
max_read_tries = 40;
max_write_tries = 40;
max_view_renewals = 5;
retry_delay_s = 15;
connect_timeout_s = 15;
request_timeout_s = 15;
linger_timeout_s = 600; // 10 Minutes.
#ifdef HAS_OPENSSL
// SSL options.
ssl_pem_cert_path = "";
ssl_pem_key_path = "";
ssl_pem_key_pass = "";
ssl_pem_trusted_certs_path = "";
ssl_pkcs12_path = "";
ssl_pkcs12_pass = "";
grid_ssl = false;
ssl_verify_certificates = false;
ssl_method_string = "ssltls";
#endif // HAS_OPENSSL
// Grid Support options.
grid_auth_mode_globus = false;
grid_auth_mode_unicore = false;
grid_gridmap_location = "";
grid_gridmap_location_default_globus = "/etc/grid-security/grid-mapfile";
grid_gridmap_location_default_unicore = "/etc/grid-security/d-grid_uudb";
grid_gridmap_reload_interval_m = 60; // 60 Minutes = 1 Hour.
// Vivaldi Options
vivaldi_enable = false;
vivaldi_enable_dir_updates = false;
#ifdef __linux__
char* home_dir = getenv("HOME");
if (home_dir) {
vivaldi_filename = string(home_dir) + "/.xtreemfs_vivaldi_coordinates";
} else {
vivaldi_filename = ".xtreemfs_vivaldi_coordinates";
}
#elif defined __APPLE__
struct passwd* pwd = getpwuid(getuid());
if (pwd) {
vivaldi_filename = string(pwd->pw_dir) + "/.xtreemfs_vivaldi_coordinates";
} else {
vivaldi_filename = ".xtreemfs_vivaldi_coordinates";
}
#elif defined WIN32
char* home_drive = getenv("HOMEDRIVE");
char* home_path = getenv("HOMEPATH");
if (home_drive && home_path) {
vivaldi_filename = string(home_drive) + string(home_path)
+ "/.xtreemfs_vivaldi_coordinates";
} else {
vivaldi_filename = ".xtreemfs_vivaldi_coordinates";
}
#else
vivaldi_filename = ".xtreemfs_vivaldi_coordinates";
#endif
vivaldi_recalculation_interval_s = 300;
vivaldi_recalculation_epsilon_s = 30;
vivaldi_max_iterations_before_updating = 12;
vivaldi_max_request_retries = 2;
// Advanced XtreemFS options.
periodic_file_size_updates_interval_s = 60; // Default: 1 Minute.
periodic_xcap_renewal_interval_s = 60; // Default: 1 Minute.
vivaldi_zipf_generator_skew = 0.5;
// Internal options, not available from the command line interface.
was_interrupted_function = NULL;
// NOTE: Deprecated options are no longer needed as members
// No additional user mapping is used by default.
additional_user_mapping_type = UserMapping::kNone;
all_descriptions_initialized_ = false;
}
void Options::GenerateProgramOptionsDescriptions() {
if (all_descriptions_initialized_) {
return;
}
// Init boost::program_options specific things, define options.
general_.add_options()
("log-level,d",
po::value(&log_level_string)->default_value(log_level_string),
"EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG")
("log-file-path,l",
po::value(&log_file_path)->default_value(log_file_path),
"Path to log file.")
("help,h",
po::value(&show_help)->zero_tokens(),
"Display this text.")
("version,V",
po::value(&show_version)->zero_tokens(),
"Shows the version number.");
optimizations_.add_options()
("metadata-cache-size",
po::value(&metadata_cache_size)->default_value(metadata_cache_size),
"Number of entries which will be cached."
"\n(Set to 0 to disable the cache.)")
("metadata-cache-ttl-s",
po::value(&metadata_cache_ttl_s)->default_value(metadata_cache_ttl_s),
"Time to live after which cached entries will expire.")
("enable-async-writes",
po::value(&enable_async_writes)
->default_value(enable_async_writes)->zero_tokens(),
"Enables asynchronous writes.")
("async-writes-max-reqs",
po::value(&async_writes_max_requests)
->implicit_value(async_writes_max_requests),
"Maximum number of pending write requests per file. Asynchronous writes"
" will block if this limit is reached first.")
("readdir-chunk-size",
po::value(&readdir_chunk_size)->default_value(readdir_chunk_size),
"Number of entries requested per readdir.")
("object-cache-size",
po::value(&object_cache_size)->default_value(object_cache_size),
"Number of cached objects per file.");
error_handling_.add_options()
("max-tries",
po::value(&max_tries)->default_value(max_tries),
"Maximum number of attempts to send a request (0 means infinite).")
("max-read-tries",
po::value(&max_read_tries)->default_value(max_read_tries),
"Maximum number of attempts to execute a read command (0 means infinite"
")."
#ifdef __linux
"\n(If you use Fuse it's not possible to interrupt a read request, i.e."
" do not set this value too high or to infinite.)"
#endif // __linux
)
("max-write-tries",
po::value(&max_write_tries)->default_value(max_write_tries),
"Maximum number of attempts to execute a write command (0 means "
"infinite)."
#ifdef __linux
"\n(Unlike read requests, write requests can get interrupted in "
"Fuse.)"
#endif // __linux
)
("max-view-renewals",
po::value(&max_view_renewals)->default_value(max_view_renewals),
"Maximum number of attempts to retry a request with a renewed view "
"in case an outdated view error did occur.")
("retry-delay",
po::value(&retry_delay_s)->default_value(retry_delay_s),
"Wait time after a request failed until next attempt (in seconds).")
("connect-timeout",
po::value(&connect_timeout_s)->default_value(connect_timeout_s),
"Timeout after which a connection attempt will be retried "
"(in seconds).")
("request-timeout",
po::value(&request_timeout_s)->default_value(request_timeout_s),
"Timeout after which a request will be retried (in seconds).")
("linger-timeout",
po::value(&linger_timeout_s)->default_value(linger_timeout_s),
"Time after which idle connections will be closed (in seconds).");
#ifdef HAS_OPENSSL
ssl_options_.add_options()
("pem-certificate-file-path",
po::value(&ssl_pem_cert_path)->default_value(ssl_pem_cert_path),
"PEM certificate file path")
("pem-private-key-file-path",
po::value(&ssl_pem_key_path)->default_value(ssl_pem_key_path),
"PEM private key file path")
("pem-private-key-passphrase",
po::value(&ssl_pem_key_pass)->default_value(ssl_pem_key_pass),
"PEM private key passphrase (If the argument is set to '-', the user"
" will be prompted for the passphrase.)")
("pem-trusted-certificates-file-path",
po::value(&ssl_pem_trusted_certs_path)
->default_value(ssl_pem_trusted_certs_path),
"PEM trusted certificates path. Contains all trusted CAs in one PEM "
"encoded file.")
#ifndef WIN32
("pkcs12-file-path",
po::value(&ssl_pkcs12_path)->default_value(ssl_pkcs12_path),
"PKCS#12 file path")
("pkcs12-passphrase",
po::value(&ssl_pkcs12_pass)->default_value(ssl_pkcs12_pass),
"PKCS#12 passphrase (If the argument is set to '-', the user will be"
" prompted for the passphrase.)")
#endif
("grid-ssl",
po::value(&grid_ssl)->zero_tokens(),
"Explicitly use the XtreemFS Grid-SSL mode. Same as specifying "
"pbrpcg:// in the volume URL.")
("verify-certificates",
po::value(&ssl_verify_certificates)->default_value(ssl_verify_certificates)
->zero_tokens(),
"Enables X.509 certificate verification.")
("ignore-verify-errors",
po::value(&ssl_ignore_verify_errors)->multitoken(),
"List of error codes to ignore during certificate verification and "
"proceed and accept, see verify(1) for the list of error codes. Only "
"evaluated in conjunction with --verify-certificates. E.g.\n"
" '--ignore-verify-errors 20 27 21' to accept certificates with "
"unknown issuer certificates, untrusted certificates and one-element "
"certificate chains (typical setup for local testing).")
("min-ssl-method",
po::value(&ssl_method_string)->default_value(ssl_method_string),
"SSL method that this client will accept:\n"
" - sslv3 accepts SSLv3 only\n"
" - ssltls accepts SSLv3 and TLSv1.x\n"
" - tlsv1 accepts TLSv1 only"
#if (BOOST_VERSION > 105300)
"\n - tlsv11 accepts TLSv1.1 only\n"
" - tlsv12 accepts TLSv1.2 only"
#endif // BOOST_VERSION > 105300
);
#endif // HAS_OPENSSL
grid_options_.add_options()
("globus-gridmap",
po::value(&grid_auth_mode_globus)->zero_tokens(),
"Authorize using globus gridmap file.")
("unicore-gridmap",
po::value(&grid_auth_mode_unicore)->zero_tokens(),
"Authorize using unicore gridmap file.")
("gridmap-location",
po::value(&grid_gridmap_location)->default_value(grid_gridmap_location),
string("Location of the gridmap file.\n"
"unicore default: " + grid_gridmap_location_default_unicore + "\n"
"globus default: " + grid_gridmap_location_default_globus).c_str())
("gridmap-reload-interval-m",
po::value(&grid_gridmap_reload_interval_m)
->default_value(grid_gridmap_reload_interval_m),
"Interval (in minutes) after which the gridmap file will be checked for"
" changes and reloaded if necessary.");
vivaldi_options_.add_options()
("vivaldi-enable",
po::value(&vivaldi_enable)->default_value(vivaldi_enable)
->zero_tokens(),
"Enables the vivaldi coordinate calculation for the client.")
("vivaldi-enable-dir-updates",
po::value(&vivaldi_enable_dir_updates)
->default_value(vivaldi_enable_dir_updates)->zero_tokens(),
"Enables sending the coordinates to the DIR after each recalculation."
" This is only needed to add the clients to the vivaldi visualization"
" at the cost of some additional traffic between client and DIR.")
("vivaldi-filename",
po::value(&vivaldi_filename)->default_value(vivaldi_filename),
"The file where the vivaldi coordinates should be saved after each "
"recalculation.")
("vivaldi-recalculation-interval",
po::value(&vivaldi_recalculation_interval_s)
->default_value(vivaldi_recalculation_interval_s),
"The interval between coordinate recalculations in seconds. "
"Also see vivaldi-recalculation-epsilon.")
("vivaldi-recalculation-epsilon",
po::value(&vivaldi_recalculation_epsilon_s)
->default_value(vivaldi_recalculation_epsilon_s),
"The recalculation interval will be randomly chosen from"
" vivaldi-recalculation-inverval +/- vivaldi-recalculation-epsilon "
"(Both in seconds).")
("vivaldi-max-iterations-before-updating",
po::value(&vivaldi_max_iterations_before_updating)
->default_value(vivaldi_max_iterations_before_updating),
"Number of coordinate recalculations before updating the list of OSDs.")
("vivaldi-max-request-retries",
po::value(&vivaldi_max_request_retries)
->default_value(vivaldi_max_request_retries),
"Maximal number of retries when requesting coordinates from another "
"vivaldi node.");
xtreemfs_advanced_options_.add_options()
("periodic-filesize-update-interval",
po::value(&periodic_file_size_updates_interval_s),
"Pause time (in seconds) between two invocations of the thread which "
"writes back file size updates to the MRC in the background.")
("periodic-xcap-renewal-interval",
po::value(&periodic_xcap_renewal_interval_s),
"Pause time (in seconds) between two invocations of the thread which "
"renews the XCap of all open file handles.")
("async-writes-max-reqsize-kb",
po::value(&async_writes_max_request_size_kb)
->implicit_value(async_writes_max_request_size_kb),
"Maximum size per write request in kB (1 kB = 1024 bytes). Usually the"
"object size or another system specific upper bound.")
("vivaldi-zipf-generator-skew",
po::value(&vivaldi_zipf_generator_skew)
->default_value(vivaldi_zipf_generator_skew),
"Skewness of the Zipf distribution used for vivaldi OSD selection.")
("enable-atime",
po::value(&enable_atime)->default_value(enable_atime)->zero_tokens(),
"Enable updates of atime attribute in Fuse and metadata cache.");
deprecated_options_.add_options()
("interrupt-signal",
po::value<int>()->notifier(MsgOptionHandler<int>(
"'interrupt-signal' is no longer supported")),
"DEPRECATED (has no effect) - Retry of a request was interrupted if "
"this signal was sent in earlier versions."
);
alternative_options_.add_options()
(",o",
po::value< std::vector<std::string> >(&alternative_options_list),
"Alternatively specify all options as a key=value1=value2 tuple list. "
"E.g.\n"
" '--opt1 --opt2 arg2 --opt3 arg3 arg4' can become\n"
" '-o opt1,opt2=arg2,opt3=arg3=arg4'.\n"
"Overridden by explicitly specified options, e.g.\n"
" '--log-level DEBUG' overrides '-o log-level=INFO'.\n"
"Short option names must be prefixed with '-' anyway, "
"e.g. '-o -d=DEBUG'. Unrecognized options are retained, "
"e.g. for Fuse, see 'Fuse Options'.");
// These options are parsed
all_descriptions_.add(general_).add(optimizations_).add(error_handling_)
#ifdef HAS_OPENSSL
.add(ssl_options_)
#endif // HAS_OPENSSL
.add(grid_options_).add(vivaldi_options_)
.add(xtreemfs_advanced_options_).add(deprecated_options_);
// These options are shown in the "-h" output
visible_descriptions_.add(general_).add(optimizations_).add(error_handling_)
#ifdef HAS_OPENSSL
.add(ssl_options_)
#endif // HAS_OPENSSL
.add(grid_options_).add(vivaldi_options_).add(alternative_options_);
all_descriptions_initialized_ = true;
}
std::vector<std::string> Options::ParseCommandLine(int argc, char** argv) {
GenerateProgramOptionsDescriptions();
// Parse alternative options specification first,
// and potentially override using explicit options later.
po::parsed_options parsed = po::command_line_parser(argc, argv)
.options(alternative_options_)
.allow_unregistered()
.style(style::default_style & ~style::allow_guessing)
.run();
boost::program_options::variables_map vm;
po::store(parsed, vm);
po::notify(vm);
// Collect all non-alternative options, i.e. all regular ones,
// and the ones that are completely unknown.
vector<string> regular_options = po::collect_unrecognized(parsed.options,
po::include_positional);
// Collect options that are not meant to be set via alternative specification.
vector<string> unrecognized_alternative_options;
typedef boost::tokenizer< boost::char_separator<char> > tokenizer;
boost::char_separator<char> list_separator(",");
boost::char_separator<char> tuple_separator("=");
// Walk all alternative options, represented as a list of comma separated
// key=value1=value2... tuples.
for (vector<string>::iterator alternative_options = alternative_options_list.begin();
alternative_options != alternative_options_list.end();
++alternative_options) {
// Split the current comma separated list into key=value1=value2... tuples.
tokenizer tuples(*alternative_options, list_separator);
for (tokenizer::iterator tuple = tuples.begin();
tuple != tuples.end();
++tuple) {
// Split the key=value1=value2... tuple into key and values.
tokenizer key_values(*tuple, tuple_separator);
// Find out whether this is a known option.
const po::option_description *opt_desc = all_descriptions_.find_nothrow(
*(key_values.begin()), false);
if (opt_desc != NULL) {
// Extract long and short option names from the formatted parameter
// '-o [ --opt ]' or '--opt' or '-o [ -- ]' (boost 1.48)
// '-o [ --opt ]' or '--opt' or '-o' (boost 1.57)
// FIXME use po::option_description::canonical_display_name
// when upgrading boost.
const string format_opt = opt_desc->format_name();
string prefixed_long_opt = "", prefixed_short_opt = "";
if(format_opt.substr(0, 2) == "--") {
// No short option available.
prefixed_long_opt = format_opt;
} else {
// Short option available, covers the other two cases.
prefixed_short_opt = format_opt.substr(0, 2);
if(format_opt.length() > 9) {
prefixed_long_opt = format_opt.substr(5, format_opt.length() - 7);
}
}
// Find out if this known option has been explicitly specified.
if (find_if(regular_options.begin(), regular_options.end(),
boost::bind(alg::starts_with<string, string, alg::is_equal>,
_1, prefixed_long_opt, alg::is_equal()))
== regular_options.end() &&
find_if(regular_options.begin(), regular_options.end(),
boost::bind(alg::starts_with<string, string, alg::is_equal>,
_1, prefixed_short_opt, alg::is_equal()))
== regular_options.end()) {
// Explicitly set option for later parsing.
regular_options.push_back(
prefixed_long_opt.empty() ? prefixed_short_opt : prefixed_long_opt);
regular_options.insert(
regular_options.end(), ++(key_values.begin()), key_values.end());
} else {
// Known option is explicitly specified, do not set.
}
} else {
// Not an option that is supposed to be set via alternative specification,
// so just add it back the way it came in.
unrecognized_alternative_options.push_back("-o");
unrecognized_alternative_options.insert(
unrecognized_alternative_options.end(),
key_values.begin(), key_values.end());
}
}
}
vm.clear();
try {
// Parse non-alternative options normally.
parsed = po::command_line_parser(regular_options)
.options(all_descriptions_)
.allow_unregistered()
.style(style::default_style & ~style::allow_guessing)
.run();
po::store(parsed, vm);
po::notify(vm);
} catch(const std::exception& e) {
// Rethrow boost errors due to invalid command line parameters.
throw InvalidCommandLineParametersException(string(e.what()));
}
if (metadata_cache_size < readdir_chunk_size && metadata_cache_size != 0) {
cerr << "Warning: Please set the metadata cache size at least as high as "
"the readdir chunk size. (Currently: " << metadata_cache_size <<
" < " << readdir_chunk_size << "). Otherwise you might experience"
" a degraded performance."
<< endl << endl;
}
if (async_writes_max_requests < 1) {
throw InvalidCommandLineParametersException("The maximum number of pending"
" asynchronous writes (async-writes-max-reqs) must be greater 0.");
}
if (!enable_async_writes && (vm.count("async-writes-max-reqsize-kb") ||
vm.count("async-writes-max-reqs"))) {
throw InvalidCommandLineParametersException("You specified async-writes-*"
" options but did not set enable-async-writes.");
}
if (enable_async_writes && object_cache_size > 0) {
throw InvalidCommandLineParametersException(
"Only one of async. writes and the object cache may be enabled.");
}
// Show help if no arguments given.
if (argc == 1) {
empty_arguments_list = true;
}
if (grid_auth_mode_globus && grid_auth_mode_unicore) {
throw InvalidCommandLineParametersException("You can only use a Globus "
"OR a Unicore gridmap file at the same time.");
}
if (grid_auth_mode_globus) {
additional_user_mapping_type = UserMapping::kGlobus;
if (grid_gridmap_location.empty()) {
grid_gridmap_location = grid_gridmap_location_default_globus;
}
}
if (grid_auth_mode_unicore) {
additional_user_mapping_type = UserMapping::kUnicore;
if (grid_gridmap_location.empty()) {
grid_gridmap_location = grid_gridmap_location_default_unicore;
}
}
#ifdef HAS_OPENSSL
// PEM certificate _and_ private key are both required.
if ((!ssl_pem_cert_path.empty() && ssl_pem_key_path.empty()) ||
(!ssl_pem_key_path.empty() && ssl_pem_cert_path.empty())) {
throw InvalidCommandLineParametersException(
"If you use SSL and PEM files, you have to specify both the PEM"
" certificate and the PEM private key.");
}
#ifndef WIN32
// PKCS#12 and PEM files are mutually exclusive.
if (!ssl_pem_key_path.empty() && !ssl_pkcs12_path.empty()) {
throw InvalidCommandLineParametersException("You can only use PEM files"
" OR a PKCS#12 certificate. However, you specified both.");
}
// PKCS#12 and PEM Private Key password are mutually exclusive.
if (!ssl_pem_key_pass.empty() && !ssl_pkcs12_pass.empty()) {
throw InvalidCommandLineParametersException("You can only use PEM files"
" OR a PKCS#12 certificate. However, you specified the password option"
" for both.");
}
#endif
// If a SSL password was given via command line, clean the value from args.
string to_be_cleaned_password;
if (!ssl_pem_key_pass.empty() && ssl_pem_key_pass != "-") {
to_be_cleaned_password = ssl_pem_key_pass;
}
if (!ssl_pkcs12_pass.empty() && ssl_pkcs12_pass != "-") {
to_be_cleaned_password = ssl_pkcs12_pass;
}
if (!to_be_cleaned_password.empty()) {
// Replace the password in all command line arguments. We don't know from
// which argv[i] it was actually parsed, so we try them all.
for (int i = 1; i < argc; i++) {
const string arg(argv[i]);
if (arg.find(to_be_cleaned_password) != string::npos) {
memset(argv[i], 0, arg.length());
}
}
}
// If the passphrase parameter was specified, but not set, mark that the
// password shall be read from stdin.
if (!ssl_pem_key_path.empty() && ssl_pem_key_pass == "-") {
ReadPasswordFromStdin(
"No PEM private key passphrase was given. Please enter it now:",
&ssl_pem_key_pass);
}
if (!ssl_pkcs12_path.empty() && ssl_pkcs12_pass == "-") {
ReadPasswordFromStdin(
"No PKCS#12 certificate passphrase was given. Please enter it now:",
&ssl_pkcs12_pass);
}
#endif // HAS_OPENSSL
// Return all unparsed options.
vector<string> unparsed_options = po::collect_unrecognized(parsed.options, po::include_positional);
unparsed_options.insert(unparsed_options.end(), unrecognized_alternative_options.begin(), unrecognized_alternative_options.end());
return unparsed_options;
}
void Options::ParseURL(XtreemFSServiceType service_type) {
int default_port;
switch(service_type) {
case kMRC:
default_port = MRC_PBRPC_PORT_DEFAULT;
break;
case kDIR:
default:
default_port = DIR_PBRPC_PORT_DEFAULT;
break;
}
PBRPCURL url_parser;
url_parser.ParseURL(xtreemfs_url, PBRPCURL::GetSchemePBRPC(), default_port);
volume_name = url_parser.volume();
service_addresses = url_parser.GetAddresses();
protocol = url_parser.scheme();
}
std::string Options::ShowCommandLineHelp() {
GenerateProgramOptionsDescriptions();
ostringstream stream;
stream << visible_descriptions_;
return stream.str();
}
std::string Options::ShowCommandLineHelpVolumeCreationAndDeletion() {
GenerateProgramOptionsDescriptions();
ostringstream stream;
stream << general_ << endl
#ifdef HAS_OPENSSL
<< ssl_options_ << endl
#endif // HAS_OPENSSL
<< grid_options_;
return stream.str();
}
std::string Options::ShowCommandLineHelpVolumeListing() {
GenerateProgramOptionsDescriptions();
ostringstream stream;
stream << general_ << endl
#ifdef HAS_OPENSSL
<< ssl_options_
#endif // HAS_OPENSSL
;
return stream.str();
}
std::string Options::ShowVersion(const std::string& component) {
return component + " " + version_string;
}
bool Options::SSLEnabled() const {
#ifdef HAS_OPENSSL
return !ssl_pem_cert_path.empty() || !ssl_pkcs12_path.empty();
#else
return false;
#endif // HAS_OPENSSL
}
xtreemfs::rpc::SSLOptions* Options::GenerateSSLOptions() const {
xtreemfs::rpc::SSLOptions* opts = NULL;
#ifdef HAS_OPENSSL
if (SSLEnabled()) {
opts = new xtreemfs::rpc::SSLOptions(
ssl_pem_key_path, ssl_pem_cert_path, ssl_pem_key_pass, // PEM.
ssl_pem_trusted_certs_path, // PEM.
ssl_pkcs12_path, ssl_pkcs12_pass, // PKCS12.
boost::asio::ssl::context::pem,
grid_ssl || protocol == PBRPCURL::GetSchemePBRPCG(),
ssl_verify_certificates,
ssl_ignore_verify_errors,
ssl_method_string);
}
#else
opts = new xtreemfs::rpc::SSLOptions();
#endif // HAS_OPENSSL
return opts;
}
void Options::ReadPasswordFromStdin(const std::string& msg,
std::string* password) {
cout << msg << endl;
getline(cin, *password);
}
} // namespace xtreemfs

View File

@@ -0,0 +1,117 @@
/*
* Copyright (c) 2011-2012 by Michael Berlin, Zuse Institute Berlin
* 2009-2011 by Patrick Schaefer, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/pbrpc_url.h"
#include <boost/algorithm/string.hpp>
#include <iostream>
#include <string>
#include <sstream>
#include "libxtreemfs/xtreemfs_exception.h"
using namespace std;
namespace xtreemfs {
PBRPCURL::PBRPCURL() : scheme_(""), servers_(), ports_(), volume_("") {}
void PBRPCURL::ParseURL(const std::string& original_url,
const std::string& default_scheme,
const uint16_t default_port) {
string url(original_url);
boost::trim(url);
scheme_ = default_scheme;
// URL will have the form:
// [pbrpc://]service-hostname[:port](,[pbrpc://]service-hostname2[:port])*[/volume_name]. // NOLINT
// Split URL by "," first to retrieve every address.
// At last, read the optional volume name from the last address.
vector<string> addresses;
boost::split(addresses, url, boost::is_any_of(","));
for (size_t i = 0; i < addresses.size(); i++) {
const string& address = addresses[i];
size_t address_pos = 0;
string scheme = default_scheme;
size_t scheme_pos = address.find("://");
if (scheme_pos != string::npos) {
// scheme specified
scheme = address.substr(0, scheme_pos);
if ((scheme != GetSchemePBRPC())
&& (scheme != GetSchemePBRPCS())
&& (scheme != GetSchemePBRPCG())
&& (scheme != GetSchemePBRPCU())) {
throw InvalidURLException(scheme_ + " is not a valid scheme");
}
if (i == 0) {
scheme_ = scheme;
} else {
if (scheme_ != scheme) {
throw InvalidURLException("The current client does not support to"
" connect to replicas with different protocols. Different"
" protocols seen are: " + scheme_ + " and: " + scheme);
}
}
address_pos = scheme_pos + 3;
}
uint16_t port = default_port;
size_t last_colon = address.find_last_of(":");
size_t last_slash = address.find_last_of("/");
if (last_colon != string::npos) {
// Port found.
if (last_colon > address_pos) {
try {
if (last_slash != string::npos && last_slash > last_colon + 1) {
// there is a volume in this address
port = boost::lexical_cast<uint16_t>(
address.substr(last_colon + 1, last_slash - last_colon - 1));
} else {
port = boost::lexical_cast<uint16_t>(address.substr(last_colon + 1));
}
} catch(const boost::bad_lexical_cast&) {
throw InvalidURLException("invalid port: " +
address.substr(last_colon + 1, last_slash - last_colon - 1));
}
} else {
last_colon = last_slash;
}
} else {
last_colon = last_slash;
}
string server = address.substr(address_pos, last_colon - address_pos);
servers_.push_back(server);
ports_.push_back(port);
// Volume is optional.
if (last_slash > address_pos && last_slash != string::npos) {
volume_ = address.substr(last_slash + 1, address.length() - last_slash);
}
} // for
}
ServiceAddresses PBRPCURL::GetAddresses() const {
ServiceAddresses addresses;
ostringstream host;
assert(servers_.size() == ports_.size());
ServerList::const_iterator servers_it = servers_.begin();
PortList::const_iterator ports_it = ports_.begin();
for (; servers_it != servers_.end(); ++servers_it, ++ports_it) {
host << *servers_it << ":" << *ports_it;
addresses.Add(host.str());
host.str("");
}
return addresses;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,114 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
* 2012 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/simple_uuid_iterator.h"
#include "libxtreemfs/uuid_container.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
SimpleUUIDIterator::SimpleUUIDIterator(const xtreemfs::pbrpc::XLocSet& xlocs) {
ClearAndGetOSDUUIDsFromXlocSet(xlocs);
}
SimpleUUIDIterator::~SimpleUUIDIterator() {
for (list<UUIDItem*>::iterator it = uuids_.begin();
it != uuids_.end();
++it) {
delete (*it);
}
}
void SimpleUUIDIterator::AddUUID(const std::string& uuid) {
boost::mutex::scoped_lock lock(mutex_);
UUIDItem* entry = new UUIDItem(uuid);
uuids_.push_back(entry);
// If its the first element, set the current UUID to the first element.
if (uuids_.size() == 1) {
current_uuid_ = uuids_.begin();
}
}
void SimpleUUIDIterator::ClearAndGetOSDUUIDsFromXlocSet(const xtreemfs::pbrpc::XLocSet& xlocs) {
boost::mutex::scoped_lock lock(mutex_);
if (xlocs.replicas_size() == 0) {
throw EmptyReplicaListInXlocSet("UUIDContainer::GetOSDUUIDFromXlocSet: "
"Empty replica list in XlocSet: " + xlocs.DebugString());
}
// Clear the list.
for (list<UUIDItem*>::iterator it = uuids_.begin();
it != uuids_.end();
++it) {
delete (*it);
}
uuids_.clear();
// Add the head OSD of each replica to the list.
for (int replica_index = 0;
replica_index < xlocs.replicas_size();
++replica_index) {
const xtreemfs::pbrpc::Replica& replica = xlocs.replicas(replica_index);
if (replica.osd_uuids_size() == 0) {
throw NoHeadOSDInXlocSet("UUIDContainer::GetOSDUUIDFromXlocSet: "
"No head OSD available in XlocSet: " + xlocs.DebugString());
}
UUIDItem* entry = new UUIDItem(replica.osd_uuids(0));
uuids_.push_back(entry);
}
// Set the current UUID to the first element.
current_uuid_ = uuids_.begin();
}
void SimpleUUIDIterator::Clear() {
boost::mutex::scoped_lock lock(mutex_);
for (list<UUIDItem*>::iterator it = uuids_.begin();
it != uuids_.end();
++it) {
delete (*it);
}
uuids_.clear();
// Empty list, i.e. current UUID is set to the past-the-end element.
current_uuid_ = uuids_.end();
}
void SimpleUUIDIterator::SetCurrentUUID(const std::string& uuid) {
boost::mutex::scoped_lock lock(mutex_);
// Search "uuid" in "uuids_" and set it to the current UUID.
for (list<UUIDItem*>::iterator it = uuids_.begin();
it != uuids_.end();
++it) {
if ((*it)->uuid == uuid) {
current_uuid_ = it;
// Reset its current state.
(*current_uuid_)->Reset();
return;
}
}
// UUID was not found, add it.
UUIDItem* entry = new UUIDItem(uuid);
uuids_.push_back(entry);
// Add current UUID to the added, last UUID.
list<UUIDItem*>::iterator it = uuids_.end();
current_uuid_ = --it;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,77 @@
/*
* Copyright (c) 2009-2011 by Patrick Schaefer, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/stripe_translator.h"
#include <algorithm>
#include <vector>
using namespace std;
using namespace xtreemfs::pbrpc;
namespace xtreemfs {
void StripeTranslatorRaid0::TranslateWriteRequest(
const char *buf,
size_t size,
int64_t offset,
PolicyContainer policies,
std::vector<WriteOperation>* operations) const {
// stripe size is stored in kB
unsigned int stripe_size = (*policies.begin())->stripe_size() * 1024;
size_t start = 0;
while (start < size) {
size_t obj_number = static_cast<size_t>(start + offset) / stripe_size;
size_t req_offset = (start + offset) % stripe_size;
size_t req_size
= min(size - start, static_cast<size_t>(stripe_size - req_offset));
std::vector<size_t> osd_offsets;
for (PolicyContainer::iterator i = policies.begin();
i != policies.end();
++i) {
osd_offsets.push_back(obj_number % (*i)->width());
}
operations->push_back(WriteOperation(
obj_number, osd_offsets, req_size, req_offset, buf + start));
start += req_size;
}
}
void StripeTranslatorRaid0::TranslateReadRequest(
char *buf,
size_t size,
int64_t offset,
PolicyContainer policies,
std::vector<ReadOperation>* operations) const {
// stripe size is stored in kB
unsigned int stripe_size = (*policies.begin())->stripe_size() * 1024;
size_t start = 0;
while (start < size) {
size_t obj_number = static_cast<size_t>(start + offset) / stripe_size;
size_t req_offset = (start + offset) % stripe_size;
size_t req_size
= min(size - start, static_cast<size_t>(stripe_size - req_offset));
std::vector<size_t> osd_offsets;
for (PolicyContainer::iterator i = policies.begin();
i != policies.end();
++i) {
osd_offsets.push_back(obj_number % (*i)->width());
}
operations->push_back(ReadOperation(
obj_number, osd_offsets, req_size, req_offset, buf + start));
start += req_size;
}
}
} // namespace xtreemfs

View File

@@ -0,0 +1,43 @@
/*
* Copyright (c) 2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/system_user_mapping.h"
#ifdef WIN32
#include "libxtreemfs/system_user_mapping_windows.h"
#else
#include "libxtreemfs/system_user_mapping_unix.h"
#endif // !WIN32
#include "libxtreemfs/user_mapping.h"
namespace xtreemfs {
SystemUserMapping* SystemUserMapping::GetSystemUserMapping() {
#ifdef WIN32
return new SystemUserMappingWindows();
#else
return new SystemUserMappingUnix();
#endif // WIN32
}
void SystemUserMapping::RegisterAdditionalUserMapping(UserMapping* mapping) {
additional_user_mapping_.reset(mapping);
}
void SystemUserMapping::StartAdditionalUserMapping() {
if (additional_user_mapping_.get()) {
additional_user_mapping_->Start();
}
}
void SystemUserMapping::StopAdditionalUserMapping() {
if (additional_user_mapping_.get()) {
additional_user_mapping_->Stop();
}
}
} // namespace xtreemfs

View File

@@ -0,0 +1,295 @@
/*
* Copyright (c) 2010-2011 by Patrick Schaefer, Zuse Institute Berlin
* 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#ifndef WIN32
#include "libxtreemfs/system_user_mapping_unix.h"
#include <grp.h>
#include <pwd.h>
#include <sys/types.h>
#include <boost/cstdint.hpp>
#include <boost/lexical_cast.hpp>
#include <fstream>
#include <iostream>
#include "util/logging.h"
#include "pbrpc/RPC.pb.h"
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
void SystemUserMappingUnix::GetUserCredentialsForCurrentUser(
xtreemfs::pbrpc::UserCredentials* user_credentials) {
user_credentials->set_username(UIDToUsername(geteuid()));
user_credentials->add_groups(GIDToGroupname(getegid()));
}
std::string SystemUserMappingUnix::UIDToUsername(uid_t uid) {
if (uid == static_cast<uid_t>(-1)) {
return string("-1");
}
string username;
// Retrieve username.
size_t bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
if (bufsize == -1) {
// Max size unknown, use safe value.
bufsize = 16384;
}
char* buf = new char[bufsize];
struct passwd pwd;
struct passwd* result = NULL;
int s = getpwuid_r(uid, &pwd, buf, bufsize, &result);
if (result == NULL) {
if (s == 0) {
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO)
<< "no mapping for uid " << uid << std::endl;
}
} else {
Logging::log->getLog(LEVEL_ERROR)
<< "failed to retrieve passwd entry for uid: " << uid << endl;
}
// Return uid as name if no mapping found.
try {
username = boost::lexical_cast<string>(uid);
} catch(const boost::bad_lexical_cast&) {
Logging::log->getLog(LEVEL_ERROR)
<< "failed to use uid for usermapping: " << uid << endl;
username = "nobody";
}
} else {
username = string(pwd.pw_name);
}
delete[] buf;
if (additional_user_mapping_.get()) {
string username_local(username);
additional_user_mapping_->LocalToGlobalUsername(username_local, &username);
}
return username;
}
uid_t SystemUserMappingUnix::UsernameToUID(const std::string& username) {
string local_username(username);
if (additional_user_mapping_.get()) {
additional_user_mapping_->GlobalToLocalUsername(username, &local_username);
}
uid_t uid = 65534; // nobody.
// Retrieve uid.
size_t bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
if (bufsize == -1) {
// Max size unknown, use safe value.
bufsize = 16384;
}
char* buf = new char[bufsize];
struct passwd pwd;
struct passwd* result = NULL;
int s = getpwnam_r(local_username.c_str(), &pwd, buf, bufsize, &result);
if (result == NULL) {
if (s == 0) {
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO)
<< "no mapping for username: " << local_username << endl;
}
} else {
Logging::log->getLog(LEVEL_ERROR)
<< "failed to retrieve passwd entry for username: "
<< local_username<< endl;
}
// Map reserved value -1 to nobody.
if (local_username == "-1") {
uid = 65534; // nobody.
} else {
// Try to convert the username into an integer. (Needed if an integer was
// stored in the first place because there was no username found for the
// uid at the creation of the file.)
try {
uid = boost::lexical_cast<uid_t>(local_username);
} catch(const boost::bad_lexical_cast&) {
uid = 65534; // nobody.
}
// boost::lexical_cast silently converts negative values into unsigned
// integers. Check if username actually contains a negative value.
if (uid != 65534) {
try {
// It's needed to use a 64 bit signed integer to detect a -(2^31)-1
// as a negative value and not as an overflowed unsigned integer of
// value 2^32-1.
int64_t uid_signed = boost::lexical_cast<int64_t>(local_username);
if (uid_signed < 0) {
uid = 65534; // nobody.
}
} catch(const boost::bad_lexical_cast&) {
// Leave uid as it is if lexical_cast failed.
}
}
}
} else {
uid = pwd.pw_uid;
}
delete[] buf;
return uid;
}
std::string SystemUserMappingUnix::GIDToGroupname(gid_t gid) {
if (gid == static_cast<gid_t>(-1)) {
return string("-1");
}
string groupname;
// Retrieve username.
size_t bufsize = sysconf(_SC_GETGR_R_SIZE_MAX);
if (bufsize == -1) {
// Max size unknown, use safe value.
bufsize = 16384;
}
char* buf = new char[bufsize];
struct group grp;
struct group* result = NULL;
int s = getgrgid_r(gid, &grp, buf, bufsize, &result);
if (result == NULL) {
if (s == 0) {
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO)
<< "no mapping for gid " << gid << endl;
}
} else {
Logging::log->getLog(LEVEL_ERROR)
<< "failed to retrieve group entry for gid: " << gid << endl;
}
// Return uid as name if no mapping found.
try {
groupname = boost::lexical_cast<string>(gid);
} catch(const boost::bad_lexical_cast&) {
Logging::log->getLog(LEVEL_ERROR)
<< "failed to use gid for usermapping: " << gid << endl;
groupname = "nobody";
}
} else {
groupname = string(grp.gr_name);
}
delete[] buf;
if (additional_user_mapping_.get()) {
string local_groupname(groupname);
additional_user_mapping_->LocalToGlobalGroupname(local_groupname,
&groupname);
}
return groupname;
}
gid_t SystemUserMappingUnix::GroupnameToGID(const std::string& groupname) {
string local_groupname(groupname);
if (additional_user_mapping_.get()) {
additional_user_mapping_->GlobalToLocalGroupname(groupname,
&local_groupname);
}
gid_t gid = 65534; // nobody.
// Retrieve gid.
size_t bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
if (bufsize == -1) {
// Max size unknown, use safe value.
bufsize = 16384;
}
char* buf = new char[bufsize];
struct group grp;
struct group* result = NULL;
int s = getgrnam_r(local_groupname.c_str(), &grp, buf, bufsize, &result);
if (result == NULL) {
if (s == 0) {
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO)
<< "no mapping for groupname: " << local_groupname << endl;
}
} else {
Logging::log->getLog(LEVEL_ERROR)
<< "failed to retrieve passwd entry for groupname: "
<< local_groupname<< endl;
}
// Map reserved value -1 to nobody.
if (local_groupname == "-1") {
gid = 65534; // nobody.
} else {
// Try to convert the groupname into an integer. (Needed if an integer was
// stored in the first place because there was no groupname found for the
// gid at the creation of the file.)
try {
gid = boost::lexical_cast<gid_t>(local_groupname);
} catch(const boost::bad_lexical_cast&) {
gid = 65534; // nobody.
}
// boost::lexical_cast silently converts negative values into unsigned
// integers. Check if groupname actually contains a negative value.
if (gid != 65534) {
try {
// It's needed to use a 64 bit signed integer to detect a -(2^31)-1
// as a negative value and not as an overflowed unsigned integer of
// value 2^32-1.
int64_t gid_signed = boost::lexical_cast<int64_t>(local_groupname);
if (gid_signed < 0) {
gid = 65534; // nobody.
}
} catch(const boost::bad_lexical_cast&) {
// Leave gid as it is if lexical_cast failed.
}
}
}
} else {
gid = grp.gr_gid;
}
delete[] buf;
return gid;
}
void SystemUserMappingUnix::GetGroupnames(uid_t uid,
gid_t gid,
pid_t pid,
std::list<std::string>* groupnames) {
groupnames->push_back(GIDToGroupname(gid));
#ifdef __linux__
// Parse /proc/<pid>/task/<pid>/status like fuse_req_getgroups.
string filename = "/proc/" + boost::lexical_cast<string>(pid) + "/task/"
+ boost::lexical_cast<string>(pid) + "/status";
ifstream in(filename.c_str());
string line;
// C++ getline() does check for failbit or badbit of the istream. If of these
// bits are set, it does break from the while loop, for instance if the file
// does not exist. In this case no additional groups are added.
while (getline(in, line)) {
if (line.length() >= 8 && line.substr(0, 8) == "Groups:\t") {
// "Groups: " entry found, read all groups
std::stringstream stringstream(line.substr(8, line.length() - 8 - 1));
std::string group_id;
while (getline(stringstream, group_id, ' ')) {
gid_t supplementary_gid = boost::lexical_cast<gid_t>(group_id);
if (supplementary_gid != gid) {
groupnames->push_back(GIDToGroupname(supplementary_gid));
}
}
break;
}
}
#endif
}
} // namespace xtreemfs
#endif // !WIN32

View File

@@ -0,0 +1,57 @@
/*
* Copyright (c) 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/system_user_mapping_windows.h"
#ifdef WIN32
#pragma comment(lib, "Netapi32.lib")
#include <windows.h>
#include <lm.h>
#include "libxtreemfs/helper.h"
#include "pbrpc/RPC.pb.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
void SystemUserMappingWindows::GetUserCredentialsForCurrentUser(
xtreemfs::pbrpc::UserCredentials* user_credentials) {
LPWKSTA_USER_INFO_1 user_info = NULL;
NET_API_STATUS result = NetWkstaUserGetInfo(
NULL,
1,
reinterpret_cast<LPBYTE*>(&user_info));
if (result == NERR_Success) {
if (user_info != NULL) {
string username = ConvertWindowsToUTF8(user_info->wkui1_username);
string groupname = ConvertWindowsToUTF8(user_info->wkui1_logon_domain);
NetApiBufferFree(user_info);
if (additional_user_mapping_.get()) {
string local_username(username);
string local_groupname(groupname);
additional_user_mapping_->LocalToGlobalUsername(local_username,
&username);
additional_user_mapping_->LocalToGlobalGroupname(local_groupname,
&groupname);
}
user_credentials->set_username(username);
user_credentials->add_groups(groupname);
}
} else {
Logging::log->getLog(LEVEL_ERROR) <<
"Failed to retrieve the current username and domain name, error"
" code: " << result << endl;
}
}
} // namespace xtreemfs
#endif // WIN32

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/user_mapping.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/user_mapping_gridmap_globus.h"
#include "libxtreemfs/user_mapping_gridmap_unicore.h"
namespace xtreemfs {
UserMapping* UserMapping::CreateUserMapping(UserMappingType type) {
Options options;
return CreateUserMapping(type, options);
}
UserMapping* UserMapping::CreateUserMapping(UserMappingType type,
const Options& options) {
switch (type) {
case kNone:
return NULL;
case kGlobus:
return new UserMappingGridmapGlobus(
options.grid_gridmap_location,
options.grid_gridmap_reload_interval_m * 60); // Min -> Seconds.
case kUnicore:
return new UserMappingGridmapUnicore(
options.grid_gridmap_location,
options.grid_gridmap_reload_interval_m * 60); // Min -> Seconds.
default:
return NULL;
}
}
} // namespace xtreemfs

View File

@@ -0,0 +1,216 @@
/*
* Copyright (c) 2010-2011 by Patrick Schaefer, Zuse Institute Berlin
* 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/user_mapping_gridmap.h"
#include <boost/algorithm/string/trim.hpp>
#include <boost/tokenizer.hpp>
#include <iostream>
#include <sys/stat.h>
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
using namespace boost;
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
UserMappingGridmap::UserMappingGridmap(const std::string& gridmap_file,
int gridmap_reload_interval_m)
: gridmap_file_(gridmap_file),
gridmap_reload_interval_s_(gridmap_reload_interval_m),
date_(0),
size_(0) {}
void UserMappingGridmap::LocalToGlobalUsername(
const std::string& username_local,
std::string* username_global) {
// map username to dn using the gridmap-file
*username_global = UsernameToDN(username_local);
if (username_global->empty()) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "gridmap: no mapping for username " << username_local << endl;
}
*username_global = username_local;
}
}
std::string UserMappingGridmap::UsernameToDN(const std::string& username) {
boost::mutex::scoped_lock lock(mutex);
boost::bimap< std::string, std::string >::right_const_iterator iter
= dn_username.right.find(username);
if (iter != dn_username.right.end()) {
return iter->second;
}
return "";
}
void UserMappingGridmap::GlobalToLocalUsername(
const std::string& username_global,
std::string* username_local) {
// The username is actually a DN.
const string& dn = username_global;
// map dn to username using the gridmap-file
*username_local = DNToUsername(dn);
if (username_local->empty()) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "gridmap: no mapping for dn " << dn << std::endl;
}
*username_local = dn;
}
}
std::string UserMappingGridmap::DNToUsername(const std::string& dn) {
boost::mutex::scoped_lock lock(mutex);
boost::bimap< std::string, std::string >::left_const_iterator iter
= dn_username.left.find(dn);
if (iter != dn_username.left.end()) {
return iter->second;
}
return "";
}
/** It is currently not possible to map between OU-entries and local groups. */
void UserMappingGridmap::LocalToGlobalGroupname(
const std::string& groupname_local,
std::string* groupname_global) {
*groupname_global = "root";
}
/** It is currently not possible to map between OU-entries and local groups. */
void UserMappingGridmap::GlobalToLocalGroupname(
const std::string& groupname_global,
std::string* groupname_local) {
*groupname_local = "root";
}
void UserMappingGridmap::GetGroupnames(
const std::string& username_local,
xtreemfs::pbrpc::UserCredentials* user_credentials) {
// obtain dn of current process
string dn;
LocalToGlobalUsername(username_local, &dn);
// map username to ou using gridmap-file
DNToOUs(dn, user_credentials);
}
void UserMappingGridmap::DNToOUs(
const std::string& dn,
xtreemfs::pbrpc::UserCredentials* user_credentials) {
// find groups for current user
boost::mutex::scoped_lock lock(mutex);
multimap<string, string>::iterator iter;
pair<
multimap<string, string>::iterator,
multimap<string, string>::iterator > range
= dn_groupname.equal_range(dn);
for (iter = range.first; iter != range.second; ++iter) {
// Logging::log->getLog(LEVEL_DEBUG)
// << "group: " << iter->second << std::endl;
user_credentials->add_groups(iter->second);
}
}
void UserMappingGridmap::Start() {
struct stat st;
if (stat(gridmap_file_.c_str(), &st) != 0) {
throw XtreemFSException("Failed to open gridmap file: " + gridmap_file_);
}
// read the grid-map-file once
ReadGridmapFile();
date_ = st.st_mtime;
size_ = st.st_size;
// monitor changes to the gridmap-file
monitor_thread_.reset(new boost::thread(
boost::bind(&UserMappingGridmap::PeriodicGridmapFileReload, this)));
}
void UserMappingGridmap::Stop() {
if (monitor_thread_) {
monitor_thread_->interrupt();
monitor_thread_->join();
}
}
void UserMappingGridmap::PeriodicGridmapFileReload() {
struct stat st;
// Monitor changes to the gridmap file.
while (true) {
boost::posix_time::seconds sleep_time(gridmap_reload_interval_s_);
boost::this_thread::sleep(sleep_time);
int ierr = stat(gridmap_file_.c_str(), &st);
if (ierr) {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN)
<< "Failed to check if the gridmap file has changed."
" Is it temporarily not available? Path to file: "
<< gridmap_file_ << " Error: " << ierr << endl;
}
continue;
}
if (st.st_mtime != date_ || st.st_size != size_) {
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO)
<< "File changed. Updating all entries." << endl;
}
ReadGridmapFile();
date_ = st.st_mtime;
size_ = st.st_size;
}
}
}
void UserMappingGridmap::Store(
std::string dn,
std::string users,
std::string user_seperator,
boost::bimap< std::string, std::string > &new_username,
std::multimap< std::string, std::string > &new_groupname) {
// if there are several usernames, use only the first one
escaped_list_separator<char> els2("", user_seperator.c_str(), "");
tokenizer< escaped_list_separator<char> > tok_user(users, els2);
tokenizer< escaped_list_separator<char> >::iterator first_username
= tok_user.begin();
std::string user = std::string(*first_username);
// cout << "gridmap: dn: '" << dn << "'" << std::endl;
// cout << "gridmap: user: " << user << std::endl;
new_username.insert(bimap< string, string >::value_type(dn, user));
// find groups (starting with OU=)
size_t ou_pos = dn.find("OU=", 0);
while (ou_pos != string::npos) {
size_t end_pos = dn.find(",", ou_pos+1);
std::string ou = dn.substr(ou_pos+3, end_pos-ou_pos-3);
// cout << "gridmap: group: " << ou << std::endl;
// add one usergroup (OU=...)
new_groupname.insert(std::pair<std::string, std::string>(dn, ou));
// search in the remaining string
ou_pos = dn.find("OU=", ou_pos+1);
}
}
} // namespace xtreemfs

View File

@@ -0,0 +1,107 @@
/*
* Copyright (c) 2010-2011 by Patrick Schaefer, Zuse Institute Berlin
* 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/user_mapping_gridmap_globus.h"
#include <boost/bimap.hpp>
#include <boost/tokenizer.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <iostream>
#include <fstream>
#include <map>
#include <vector>
#include "util/logging.h"
#include "libxtreemfs/xtreemfs_exception.h"
using namespace boost;
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
UserMappingGridmapGlobus::UserMappingGridmapGlobus(
const std::string& gridmap_file,
int gridmap_reload_interval_s)
: UserMappingGridmap(gridmap_file,
gridmap_reload_interval_s) {}
void UserMappingGridmapGlobus::ReadGridmapFile() {
boost::bimap< std::string, std::string > new_username;
std::multimap< std::string, std::string > new_groupname;
ifstream in(gridmap_file().c_str());
if (!in.is_open()) {
Logging::log->getLog(LEVEL_ERROR)
<< "gridmap: could not open gridmap-file: " << gridmap_file() << endl;
}
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO)
<< "gridmap: loading users and groups from file: "
<< gridmap_file() << endl;
}
std::vector< std::string > vec;
std::string line;
string separator1(""); // no escaping
string separator2(" "); // split dn on spaces
string separator3("\""); // the dn is enclosed by "dn"
escaped_list_separator<char> els(separator1, separator2, separator3);
// seperator for the dn-string
escaped_list_separator<char> els_dn("", "/", "");
// read lines
while (getline(in, line)) {
tokenizer< escaped_list_separator<char> > tok(line, els);
vec.clear();
vec.assign(tok.begin(), tok.end());
// are there two entries available?
if (vec.size() < 2) {
Logging::log->getLog(LEVEL_ERROR)
<< "gridmap: could not parse line: " << line << std::endl;
continue;
}
boost::trim(vec[0]); // dn
boost::trim(vec[1]); // username
// reformat globus-dn to unicore-dn
// there is no reverse iterator, so we reverse the characters instead.
std::reverse(vec[0].begin(), vec[0].end());
tokenizer< escaped_list_separator<char> > tok_dn(vec[0], els_dn);
std::stringstream dn_stream;
// reverse order and separate elements using ","
for (tokenizer<escaped_list_separator<char> >::iterator beg
= tok_dn.begin(); beg != tok_dn.end(); ++beg) {
std::string word = *beg;
std::reverse(word.begin(), word.end());
dn_stream << "," << word;
}
// store the dn, groups and username
std::string dn_parsed = dn_stream.str();
Store(dn_parsed.substr(1, dn_parsed.length()-2),
std::string(vec[1]), ",", new_username, new_groupname);
}
// update changes
boost::mutex::scoped_lock lock(mutex);
dn_username.clear();
dn_groupname.clear();
dn_username.insert(new_username.begin(), new_username.end());
dn_groupname.insert(new_groupname.begin(), new_groupname.end());
}
} // namespace xtreemfs

View File

@@ -0,0 +1,154 @@
/*
* Copyright (c) 2010-2011 by Patrick Schaefer, Zuse Institute Berlin
* 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/user_mapping_gridmap_unicore.h"
#include <boost/tokenizer.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <vector>
#include "util/logging.h"
#include "libxtreemfs/xtreemfs_exception.h"
using namespace boost;
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
UserMappingGridmapUnicore::UserMappingGridmapUnicore(
const std::string& gridmap_file,
int gridmap_reload_interval_s)
: UserMappingGridmap(gridmap_file,
gridmap_reload_interval_s) {}
/**
* uudb-format (Unicore < 6)
* dgls0050:dgms0006=CN=Patrick Schaefer,OU=CSR,O=GridGermany,C=DE
*
* xuudb-format (Unicore 6)
* 225;zib;dgms0006:dgls0050;user;mosgrid:lifescience;
* CN=Patrick Schaefer,OU=CSR,O=GridGermany,C=DE
*
*/
void UserMappingGridmapUnicore::ReadGridmapFile() {
boost::bimap< std::string, std::string > new_username;
std::multimap< std::string, std::string > new_groupname;
ifstream in(gridmap_file().c_str());
if (!in.is_open()) {
Logging::log->getLog(LEVEL_ERROR)
<< "gridmap: could not open gridmap-file: " << gridmap_file() << endl;
}
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO)
<< "gridmap: loading users and groups from file: "
<< gridmap_file() << endl;
}
std::string line;
getline(in, line); // read first line to determine format
in.clear(); // reset ifstream
in.seekg(0);
if (std::count(line.begin(), line.end(), ';')>3) {
// unicore 6
ReadGridmapFileUnicore6(in, new_username, new_groupname);
}
else {
// unicore <6
ReadGridmapFileUnicore(in, new_username, new_groupname);
}
// update changes
boost::mutex::scoped_lock lock(mutex);
dn_username.clear();
dn_groupname.clear();
dn_username.insert(new_username.begin(), new_username.end());
dn_groupname.insert(new_groupname.begin(), new_groupname.end());
}
/**
* uudb-format (Unicore < 6)
* dgls0050:dgms0006=CN=Patrick Schaefer,OU=CSR,O=GridGermany,C=DE
*/
void UserMappingGridmapUnicore::ReadGridmapFileUnicore(
std::ifstream &in,
boost::bimap< std::string, std::string > &new_username,
std::multimap< std::string, std::string > &new_groupname) {
std::vector< std::string > vec;
std::string line;
// read lines
while(getline(in, line)) {
vec.clear();
// split string at first '='
size_t end_of_users_pos = line.find("=");
vec.push_back(line.substr(0, end_of_users_pos));
vec.push_back(line.substr(end_of_users_pos+1,
line.size()-end_of_users_pos-1));
// are there two entries available?
if (vec.size() < 2) {
Logging::log->getLog(LEVEL_ERROR)
<< "gridmap: could not parse line: " << line << std::endl;
continue;
}
trim(vec[1]); // dn
trim(vec[0]); // usernames
// store the dn, groups and username
Store(std::string(vec[1]), std::string(vec[0]), ":",
new_username, new_groupname);
}
}
/**
* xuudb-format (Unicore 6)
* 225;zib;dgms0006:dgls0050;user;mosgrid:lifescience;CN=Patrick Schaefer,OU=CSR,O=GridGermany,C=DE
*/
void UserMappingGridmapUnicore::ReadGridmapFileUnicore6(
std::ifstream &in,
boost::bimap< std::string, std::string > &new_username,
std::multimap< std::string, std::string > &new_groupname) {
std::vector< std::string > vec;
std::string line;
string separator1(""); // no escaping
string separator2(";"); // split dn on ;
string separator3(""); // the dn is not enclosed
escaped_list_separator<char> els(separator1, separator2, separator3);
// read lines
while(getline(in, line)) {
tokenizer< escaped_list_separator<char> > tok(line, els);
vec.clear();
vec.assign(tok.begin(), tok.end());
// are there two entries available?
if (vec.size() < 6) {
Logging::log->getLog(LEVEL_ERROR)
<< "gridmap: could not parse line: " << line << std::endl;
continue;
}
trim(vec[5]); // dn
trim(vec[2]); // username
// store the dn, groups and username
Store(std::string(vec[5]), std::string(vec[2]), ":",
new_username, new_groupname);
}
}
} // namespace xtreemfs

View File

@@ -0,0 +1,78 @@
/*
* Copyright (c) 2009-2011 by Patrick Schaefer, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/uuid_cache.h"
#include <time.h>
#include <map>
#include <string>
#include <vector>
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
void UUIDCache::update(
const std::string& uuid,
const std::string& address,
const uint32_t port,
const time_t ttls) {
boost::mutex::scoped_lock lock(mutex_);
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "UUID: registering new UUID "
<< uuid << " "
<< address << ":" << port
<< endl;
}
UUIDMapping uuidMapping = UUIDMapping();
uuidMapping.address = address;
uuidMapping.uuid = uuid;
uuidMapping.port = port;
uuidMapping.timeout = time(NULL) + ttls; // calc timeout in seconds
// address contains update-time to evict old entries
cache_[uuid] = uuidMapping;
}
/**
* Old UUIDs are invalidated but there is no active pulling of new UUIDs.
*/
std::string UUIDCache::get(const std::string& uuid) {
boost::mutex::scoped_lock lock(mutex_);
std::map<string, UUIDMapping >::iterator it = cache_.find(uuid);
// entry found?
if (it != cache_.end()) {
// entry timed out?
const UUIDMapping mapping = it->second;
if (time(NULL) < mapping.timeout) {
// Build ip-address:port from AddressMapping.
ostringstream s;
s << mapping.address << ":" << mapping.port;
return s.str();
} else {
// Expired => Remove from cache_.
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "UUID expired:" << uuid << endl;
}
cache_.erase(it);
}
}
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "UUID cache miss:" << uuid << endl;
}
return "";
}
} // namespace xtreemfs

View File

@@ -0,0 +1,82 @@
/*
* Copyright (c) 2012 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/uuid_container.h"
#include <sstream>
#include "libxtreemfs/container_uuid_iterator.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
UUIDContainer::UUIDContainer(const xtreemfs::pbrpc::XLocSet& xlocs) {
GetOSDUUIDsFromXlocSet(xlocs);
}
UUIDContainer::~UUIDContainer() {
// delete all uuids
for (Iterator it = uuids_.begin(); it != uuids_.end(); ++it) {
for (InnerIterator iIt = it->begin(); iIt != it->end(); ++iIt) {
delete (*iIt);
}
}
}
void UUIDContainer::GetOSDUUIDsFromXlocSet(
const xtreemfs::pbrpc::XLocSet& xlocs) {
boost::mutex::scoped_lock lock(mutex_);
if (xlocs.replicas_size() == 0) {
throw EmptyReplicaListInXlocSet("UUIDContainer::GetOSDUUIDFromXlocSet: "
"Empty replica list in XlocSet: " + xlocs.DebugString());
}
for (int replica_index = 0;
replica_index < xlocs.replicas_size();
++replica_index) {
const xtreemfs::pbrpc::Replica& replica = xlocs.replicas(replica_index);
if (replica.osd_uuids_size() == 0) {
throw NoHeadOSDInXlocSet("UUIDContainer::GetOSDUUIDFromXlocSet: "
"No head OSD available in XlocSet: " + xlocs.DebugString());
}
uuids_.push_back(InnerContainer());
for (int stripe_index = 0;
stripe_index < replica.osd_uuids_size();
++stripe_index) {
this->uuids_[replica_index].push_back(new UUIDItem(
replica.osd_uuids(stripe_index)));
}
}
}
void UUIDContainer::FillUUIDIterator(ContainerUUIDIterator* uuid_iterator,
std::vector<size_t> offsets) {
assert(offsets.size() == uuids_.size());
boost::mutex::scoped_lock lock(mutex_);
// NOTE: if this method would be used in another context than the construction
// of ContainerUUIDIterator, the following line would be needed:
// uuid_iterator->Clear();
Iterator replica_iterator = uuids_.begin();
std::vector<size_t>::iterator offset_iterator = offsets.begin();
for (; replica_iterator != uuids_.end();
++replica_iterator, ++offset_iterator) {
uuid_iterator->AddUUIDItem((*replica_iterator)[*offset_iterator]);
}
}
} // namespace xtreemfs

View File

@@ -0,0 +1,85 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
* 2012 by Matthias Noack, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/uuid_iterator.h"
#include <sstream>
#include "libxtreemfs/uuid_container.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
UUIDIterator::UUIDIterator() {
// Point to the past-the-end element in case of an empty list.
current_uuid_ = uuids_.end();
}
UUIDIterator::~UUIDIterator() {}
void UUIDIterator::GetUUID(std::string* result) {
assert(result);
boost::mutex::scoped_lock lock(mutex_);
if (current_uuid_ == uuids_.end()) {
throw UUIDIteratorListIsEmpyException("GetUUID() failed because the list of"
" UUIDs is empty.");
} else {
assert(!(*current_uuid_)->IsFailed());
*result = (*current_uuid_)->uuid;
}
}
std::string UUIDIterator::DebugString() {
ostringstream stream;
stream << "[ ";
boost::mutex::scoped_lock lock(mutex_);
for (list<UUIDItem*>::iterator it = uuids_.begin();
it != uuids_.end();
++it) {
if (it != uuids_.begin()) {
stream << ", ";
}
stream << "[ " << (*it)->uuid << ", " << (*it)->IsFailed() << "]";
}
stream << " ]";
return stream.str();
}
void UUIDIterator::MarkUUIDAsFailed(const std::string& uuid) {
boost::mutex::scoped_lock lock(mutex_);
// Only take actions if "uuid" is the current UUID.
if (current_uuid_ != uuids_.end() && (*current_uuid_)->uuid == uuid) {
(*current_uuid_)->MarkAsFailed();
current_uuid_++;
if (current_uuid_ == uuids_.end()) {
// Reset the status of all entries and set the first as current UUID.
for (list<UUIDItem*>::iterator it = uuids_.begin();
it != uuids_.end();
++it) {
(*it)->Reset();
}
current_uuid_ = uuids_.begin();
} else {
// Reset the current UUID to make sure it is not marked as failed.
(*current_uuid_)->Reset();
}
}
}
} // namespace xtreemfs

View File

@@ -0,0 +1,498 @@
/*
* Copyright (c) 2009 Juan Gonzalez de Benito,
* 2011 Bjoern Kolbeck (Zuse Institute Berlin),
* 2012 Matthias Noack (Zuse Institute Berlin)
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "libxtreemfs/vivaldi.h"
#include <boost/bind.hpp>
#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <boost/lexical_cast.hpp>
#include <algorithm>
#include <fstream>
#include <string>
#include <vector>
#include "libxtreemfs/execute_sync_request.h"
#include "libxtreemfs/helper.h"
#include "libxtreemfs/options.h"
#include "libxtreemfs/pbrpc_url.h"
#include "libxtreemfs/simple_uuid_iterator.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
#include "util/zipf_generator.h"
#include "xtreemfs/DIRServiceClient.h"
#include "xtreemfs/GlobalTypes.pb.h"
#include "xtreemfs/OSDServiceClient.h"
using namespace std;
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
namespace xtreemfs {
static void AddAddresses(const ServiceAddresses& service_addresses,
SimpleUUIDIterator* uuid_iterator) {
ServiceAddresses::Addresses as_list = service_addresses.GetAddresses();
for (ServiceAddresses::Addresses::const_iterator iter = as_list.begin();
iter != as_list.end(); ++iter) {
uuid_iterator->AddUUID(*iter);
}
}
Vivaldi::Vivaldi(
const ServiceAddresses& dir_addresses,
UUIDResolver* uuid_resolver,
const Options& options)
: uuid_resolver_(uuid_resolver),
vivaldi_options_(options) {
srand(static_cast<unsigned int>(time(NULL)));
// Set AuthType to AUTH_NONE as it's currently not used.
auth_bogus_.set_auth_type(AUTH_NONE);
// Set username "xtreemfs" as it does not get checked at server side.
user_credentials_bogus_.set_username("xtreemfs");
// Vivaldi requests do not have to be retried nor interrupted.
vivaldi_options_.max_tries = 1;
vivaldi_options_.was_interrupted_function = NULL;
dir_service_addresses_.reset(new SimpleUUIDIterator());
AddAddresses(dir_addresses, dir_service_addresses_.get());
}
void Vivaldi::Initialize(rpc::Client* rpc_client) {
dir_client_.reset(new pbrpc::DIRServiceClient(rpc_client));
osd_client_.reset(new pbrpc::OSDServiceClient(rpc_client));
}
void Vivaldi::Run() {
assert(dir_client_.get() != NULL);
assert(osd_client_.get() != NULL);
bool loaded_from_file = false;
ifstream vivaldi_coordinates_file(vivaldi_options_.vivaldi_filename.c_str());
if (vivaldi_coordinates_file.is_open()) {
my_vivaldi_coordinates_.ParseFromIstream(&vivaldi_coordinates_file);
loaded_from_file = my_vivaldi_coordinates_.IsInitialized();
if (!loaded_from_file) {
Logging::log->getLog(LEVEL_ERROR)
<< "Vivaldi: Could not load coordinates from file: "
<< my_vivaldi_coordinates_.InitializationErrorString() << endl;
my_vivaldi_coordinates_.Clear();
}
vivaldi_coordinates_file.close();
}
if (!loaded_from_file) {
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO)
<< "Vivaldi: Coordinates file does not exist or could not be parsed,"
<< "starting with empty coordinates." << endl
<< "Initialization might take some time." << endl;
}
// Initialize coordinates to (0,0) by default
my_vivaldi_coordinates_.set_local_error(0.0);
my_vivaldi_coordinates_.set_x_coordinate(0.0);
my_vivaldi_coordinates_.set_y_coordinate(0.0);
}
VivaldiNode own_node(my_vivaldi_coordinates_);
uint64_t vivaldi_iterations = 0;
list<KnownOSD> known_osds;
bool valid_known_osds = false;
vector<uint64_t> current_retries;
int retries_in_a_row = 0;
list<KnownOSD>::iterator chosen_osd_service;
ZipfGenerator rank_generator(vivaldi_options_.vivaldi_zipf_generator_skew);
for (;;) {
boost::scoped_ptr<rpc::SyncCallbackBase> ping_response;
try {
// Get a list of OSDs from the DIR(s)
if ((vivaldi_iterations %
vivaldi_options_.vivaldi_max_iterations_before_updating) == 0) {
valid_known_osds = UpdateKnownOSDs(&known_osds, own_node);
if (valid_known_osds && !known_osds.empty()) {
rank_generator.set_size(known_osds.size());
}
// The pending retries are discarded, because the old OSDs might not
// be in the new list
current_retries.clear();
retries_in_a_row = 0;
chosen_osd_service = known_osds.begin();
}
// There are known OSDs, ping one of them.
if (valid_known_osds && !known_osds.empty()) {
// Choose an OSD, only if there's no pending retry
if (retries_in_a_row == 0) {
int index = rank_generator.next();
list<KnownOSD>::iterator known_iterator = known_osds.begin();
for (int i = 0;
(i < index) && (known_iterator != known_osds.end());
known_iterator++, i++) {
// Move the iterator over the chosen service
}
chosen_osd_service = known_iterator;
}
// Ping chosen OSD.
xtreemfs_pingMesssage ping_message;
ping_message.set_request_response(true);
ping_message.mutable_coordinates()
->MergeFrom(*own_node.GetCoordinates());
VivaldiCoordinates* random_osd_vivaldi_coordinates;
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "Vivaldi: recalculating against: "
<< chosen_osd_service->GetUUID() << endl;
}
SimpleUUIDIterator pinged_osd;
pinged_osd.AddUUID(chosen_osd_service->GetUUID());
// execute sync ping
try {
// start timing
boost::posix_time::ptime start_time(boost::posix_time
::microsec_clock::local_time());
ping_response.reset(
ExecuteSyncRequest(
boost::bind(
&xtreemfs::pbrpc::OSDServiceClient::xtreemfs_ping_sync,
osd_client_.get(),
_1,
boost::cref(auth_bogus_),
boost::cref(user_credentials_bogus_),
&ping_message),
&pinged_osd,
uuid_resolver_,
RPCOptionsFromOptions(vivaldi_options_)));
// stop timing
boost::posix_time::ptime end_time(
boost::posix_time::microsec_clock::local_time());
boost::posix_time::time_duration rtt = end_time - start_time;
uint64_t measured_rtt = rtt.total_milliseconds();
xtreemfs::pbrpc::xtreemfs_pingMesssage* ping_response_obj =
static_cast<xtreemfs::pbrpc::xtreemfs_pingMesssage*>(
ping_response->response());
random_osd_vivaldi_coordinates = ping_response_obj->mutable_coordinates();
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "Vivaldi: ping response received. Measured time: "
<< measured_rtt << " ms" << endl;
}
// Recalculate coordinates here
if (retries_in_a_row < vivaldi_options_.vivaldi_max_request_retries) {
if (!own_node.RecalculatePosition(*random_osd_vivaldi_coordinates,
measured_rtt,
false)) {
// The movement has been postponed because the measured RTT
// seems to be a peak
current_retries.push_back(measured_rtt);
retries_in_a_row++;
} else {
// The movement has been accepted
current_retries.clear();
retries_in_a_row = 0;
}
} else {
// Choose the lowest RTT
uint64_t lowest_rtt = measured_rtt;
for (vector<uint64_t>::iterator retries_iterator =
current_retries.begin();
retries_iterator < current_retries.end();
++retries_iterator) {
if (*retries_iterator < lowest_rtt) {
lowest_rtt = *retries_iterator;
}
}
// Force recalculation after too many retries
own_node.RecalculatePosition(*random_osd_vivaldi_coordinates,
lowest_rtt,
true);
current_retries.clear();
retries_in_a_row = 0;
// set measured_rtt to the actually used one for trace output
measured_rtt = lowest_rtt;
}
} catch (const XtreemFSException& e) {
if (ping_response.get()) {
ping_response->DeleteBuffers();
}
Logging::log->getLog(LEVEL_ERROR)
<< "Vivaldi: could not ping OSDs: " << e.what() << endl;
// We must avoid to keep retrying indefinitely against an OSD which is not
// responding
if (retries_in_a_row > 0
&& (++retries_in_a_row >=
vivaldi_options_.vivaldi_max_request_retries)) {
// If the last retry times out all the previous retries are discarded
current_retries.clear();
retries_in_a_row = 0;
}
}
// update local coordinate copy here
{
boost::mutex::scoped_lock lock(coordinate_mutex_);
my_vivaldi_coordinates_.CopyFrom(*own_node.GetCoordinates());
}
// Store the new coordinates in a local file
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "Vivaldi: storing coordinates to file: ("
<< own_node.GetCoordinates()->x_coordinate() << ", "
<< own_node.GetCoordinates()->y_coordinate() << ")" << endl;
}
ofstream file_out(vivaldi_options_.vivaldi_filename.c_str(),
ios_base::binary | ios_base::trunc);
own_node.GetCoordinates()->SerializePartialToOstream(&file_out);
file_out.close();
// Update client coordinates at the DIR
if (vivaldi_options_.vivaldi_enable_dir_updates) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "Vivaldi: Sending coordinates to DIR." << endl;
}
boost::scoped_ptr<rpc::SyncCallbackBase> response;
try {
response.reset(
ExecuteSyncRequest(
boost::bind(
&xtreemfs::pbrpc::DIRServiceClient
::xtreemfs_vivaldi_client_update_sync,
dir_client_.get(),
_1,
boost::cref(auth_bogus_),
boost::cref(user_credentials_bogus_),
own_node.GetCoordinates()),
dir_service_addresses_.get(),
NULL,
RPCOptionsFromOptions(vivaldi_options_),
true));
response->DeleteBuffers();
} catch (const XtreemFSException& e) {
if (response.get()) {
response->DeleteBuffers();
}
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO)
<< "Vivaldi: Failed to send the updated client"
" coordinates to the DIR, error: "
<< e.what() << endl;
}
}
}
// //Print a trace
// char auxStr[256];
// SPRINTF_VIV(auxStr,
// 256,
// "%s:%lld(Viv:%.3f) Own:(%.3f,%.3f) lE=%.3f "
// "Rem:(%.3f,%.3f) rE=%.3f %s\n",
// retried ? "RETRY" : "RTT",
// static_cast<long long int> (measured_rtt),
// own_node.calculateDistance(
// (*own_node.getCoordinates()),
// random_osd_vivaldi_coordinates.get()),
// own_node.getCoordinates()->x_coordinate(),
// own_node.getCoordinates()->y_coordinate(),
// own_node.getCoordinates()->local_error(),
// random_osd_vivaldi_coordinates->x_coordinate(),
// random_osd_vivaldi_coordinates->y_coordinate(),
// random_osd_vivaldi_coordinates->local_error(),
// chosen_osd_service->get_uuid().data());
// get_log()->getStream(YIELD::platform::Log::LOG_INFO) <<
// "Vivaldi: " << auxStr;
// Update OSD's coordinates
chosen_osd_service->SetCoordinates(*random_osd_vivaldi_coordinates);
// Re-sort known_osds
// TODO(mno): Use a more efficient sort approach.
list<KnownOSD> aux_osd_list(known_osds);
KnownOSD chosen_osd_service_value = *chosen_osd_service;
known_osds.clear(); // NOTE: this invalidates all ptrs and itrs
for (list<KnownOSD>::reverse_iterator aux_iterator
= aux_osd_list.rbegin();
aux_iterator != aux_osd_list.rend();
aux_iterator++) {
double new_osd_distance =
own_node.CalculateDistance(
*(aux_iterator->GetCoordinates()),
*own_node.GetCoordinates());
list<KnownOSD>::iterator known_iterator = known_osds.begin();
while (known_iterator != known_osds.end()) {
double old_osd_distance = \
own_node.CalculateDistance(
*(known_iterator->GetCoordinates()),
*own_node.GetCoordinates());
if (old_osd_distance >= new_osd_distance) {
known_osds.insert(known_iterator, (*aux_iterator));
break;
} else {
known_iterator++;
}
}
if (known_iterator == known_osds.end()) {
known_osds.push_back((*aux_iterator));
}
} // end re-sorting
// find the chosen OSD in the resorted list
chosen_osd_service = find(known_osds.begin(),
known_osds.end(),
chosen_osd_service_value);
assert(chosen_osd_service != known_osds.end());
ping_response->DeleteBuffers();
} else {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN)
<< "Vivaldi: no OSD available." << endl;
}
}
vivaldi_iterations = (vivaldi_iterations + 1) % LONG_MAX;
// Sleep until the next iteration
uint32_t sleep_in_s = static_cast<uint32_t>(
vivaldi_options_.vivaldi_recalculation_interval_s -
vivaldi_options_.vivaldi_recalculation_epsilon_s +
(static_cast<double>(rand()) / (RAND_MAX - 1)) *
2.0 * vivaldi_options_.vivaldi_recalculation_epsilon_s);
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "Vivaldi: sleeping during " << sleep_in_s << " s." << endl;
}
boost::this_thread::sleep(boost::posix_time::seconds(sleep_in_s));
} catch(const boost::thread_interrupted&) {
if (ping_response.get()) {
ping_response->DeleteBuffers();
}
break;
}
}
} // Run()
bool Vivaldi::UpdateKnownOSDs(list<KnownOSD>* updated_osds,
const VivaldiNode& own_node) {
// TODO(mno): Requesting the list of all OSDs does not scale with the number
// of services. Therefore, request only a subset of it.
bool retval = true;
boost::scoped_ptr<rpc::SyncCallbackBase> response;
try {
serviceGetByTypeRequest request;
request.set_type(SERVICE_TYPE_OSD);
response.reset(ExecuteSyncRequest(
boost::bind(
&xtreemfs::pbrpc::DIRServiceClient
::xtreemfs_service_get_by_type_sync,
dir_client_.get(),
_1,
boost::cref(auth_bogus_),
boost::cref(user_credentials_bogus_),
&request),
dir_service_addresses_.get(),
NULL,
RPCOptionsFromOptions(vivaldi_options_),
true));
ServiceSet* received_osds = static_cast<ServiceSet*>(response->response());
updated_osds->clear();
// Fill the list, ignoring every offline OSD
for (int i = 0; i < received_osds->services_size(); i++) {
const Service& service = received_osds->services(i);
if (service.last_updated_s() > 0) { // only online OSDs
const ServiceDataMap& sdm = service.data();
const string* coordinates_string = NULL;
for (int j = 0; j < sdm.data_size(); ++j) {
if (sdm.data(j).key() == "vivaldi_coordinates") {
coordinates_string = &sdm.data(j).value();
break;
}
}
// If the DIR does not have the OSD's coordinates, we discard this
// entry
if (coordinates_string) {
// Parse the coordinates provided by the DIR
VivaldiCoordinates osd_coords;
OutputUtils::StringToCoordinates(*coordinates_string, osd_coords);
KnownOSD new_osd(service.uuid(), osd_coords);
// Calculate the current distance from the client to the new OSD
double new_osd_distance = own_node.CalculateDistance(
*(own_node.GetCoordinates()),
osd_coords);
list<KnownOSD>::iterator up_iterator = updated_osds->begin();
while (up_iterator != updated_osds->end()) {
double old_osd_distance =
own_node.CalculateDistance(*up_iterator->GetCoordinates(),
*(own_node.GetCoordinates()));
if (old_osd_distance >= new_osd_distance) {
updated_osds->insert(up_iterator, new_osd);
break;
} else {
up_iterator++;
}
}
if (up_iterator == updated_osds->end()) {
updated_osds->push_back(new_osd);
}
} // if (coordinates_string)
}
} // for
response->DeleteBuffers();
} catch (const XtreemFSException& e) {
if (response.get()) {
response->DeleteBuffers();
}
Logging::log->getLog(LEVEL_ERROR)
<< "Vivaldi: Failed to update known OSDs: " << e.what() << endl;
retval = false;
}
return retval;
} // update_known_osds
const VivaldiCoordinates& Vivaldi::GetVivaldiCoordinates() const {
boost::mutex::scoped_lock lock(coordinate_mutex_);
return my_vivaldi_coordinates_;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,263 @@
/*
* Copyright (c) 2009 Juan Gonzalez de Benito.
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <sstream>
#include "libxtreemfs/vivaldi_node.h"
namespace xtreemfs {
/**
*
* @return the current coordinates of the node.
*/
const xtreemfs::pbrpc::VivaldiCoordinates * VivaldiNode::GetCoordinates() const {
return &this->ownCoordinates;
}
/**
* Multiplies a pair of coordinates by a given real number and stores the
* result in coord
*
* @param coordA the coordinates to be multiplied.
* @param value the real number to multiply by.
*/
void VivaldiNode::MultiplyValueCoordinates(
xtreemfs::pbrpc::VivaldiCoordinates* coord,
double value) {
coord->set_x_coordinate(coord->x_coordinate() * value);
coord->set_y_coordinate(coord->y_coordinate() * value);
}
/**
* Adds two pairs of coordinates and stores the result in coordA
*/
void VivaldiNode::AddCoordinates(
xtreemfs::pbrpc::VivaldiCoordinates* coordA,
const xtreemfs::pbrpc::VivaldiCoordinates& coordB) {
coordA->set_x_coordinate(coordA->x_coordinate() + coordB.x_coordinate());
coordA->set_y_coordinate(coordA->y_coordinate() + coordB.y_coordinate());
}
/**
* Subtracts two pairs of coordinates and stores the result in coordA
*/
void VivaldiNode::SubtractCoordinates(
xtreemfs::pbrpc::VivaldiCoordinates* coordA,
const xtreemfs::pbrpc::VivaldiCoordinates& coordB) {
coordA->set_x_coordinate(coordA->x_coordinate() - coordB.x_coordinate());
coordA->set_y_coordinate(coordA->y_coordinate() - coordB.y_coordinate());
}
/**
* Multiplies two pairs of coordinates using the scalar product.
* A · B = Ax*Bx + Ay*By
*
* @param coordA a pair of coordinates.
* @param coordB a pair of coordinates.
* @return the result of the scalar product.
*/
double VivaldiNode::ScalarProductCoordinates(
const xtreemfs::pbrpc::VivaldiCoordinates& coordA,
const xtreemfs::pbrpc::VivaldiCoordinates& coordB) {
double retval = 0.0;
retval += coordA.x_coordinate() * coordB.x_coordinate();
retval += coordA.y_coordinate() * coordB.y_coordinate();
return retval;
}
/**
* Calculates the magnitude of a given vector.
*
* @param coordA the coordinates whose magnitude must be calculated.
* @return the distance from the position defined by the coordinates to the
* origin of the system.
*/
double VivaldiNode::MagnitudeCoordinates(
const xtreemfs::pbrpc::VivaldiCoordinates& coordA) {
double sProd = ScalarProductCoordinates(coordA, coordA);
return sqrt(sProd);
}
/**
* Calculates the unitary vector of a given vector and stores the result
* in coord
*
* @return true if it's been possible to calculate the vector or false
* otherwise
*/
bool VivaldiNode::GetUnitaryCoordinates(
xtreemfs::pbrpc::VivaldiCoordinates* coord) {
bool retval = false;
double magn = MagnitudeCoordinates(*coord);
if (magn > 0) { // cannot be == 0
MultiplyValueCoordinates(coord, 1.0 / magn);
retval = true;
}
return retval;
}
/**
* Modifies a pair of coordinates with a couple of random values, so they are
* included in the interval (-1,1) and have also a random direction.
*
* @param coord coordinates that must be modified.
*/
void VivaldiNode::ModifyCoordinatesRandomly(
xtreemfs::pbrpc::VivaldiCoordinates* coord) {
// static_cast<double>(rand()))/RAND_MAX) generates real values btw 0 and 1
coord->set_x_coordinate(((static_cast<double> (rand()) / RAND_MAX) *2) - 1);
coord->set_y_coordinate(((static_cast<double> (rand()) / RAND_MAX) *2) - 1);
}
/**
* Modifies the position of the node according to the current distance to a
* given point in the coordinate space and the real RTT measured against it.
*/
bool VivaldiNode::RecalculatePosition(
const xtreemfs::pbrpc::VivaldiCoordinates& coordinatesJ,
uint64_t measuredRTT,
bool forceRecalculation) {
bool retval = true;
double localError = ownCoordinates.local_error();
// SUBTRACTION = Xi - Xj
xtreemfs::pbrpc::VivaldiCoordinates subtractionVector(ownCoordinates);
SubtractCoordinates(&subtractionVector, coordinatesJ);
// ||SUBTRACTION|| should be ~= RTT
double subtractionMagnitude = MagnitudeCoordinates(subtractionVector);
// Sample weight balances local and remote error
// If it's close to 1, J knows more than me: localError > errorJ
// If it's close to 0.5, we both know the same: A/2A = 1/2
// If it's close to 0, I know more than it: localError < errorJ
double weight = 0.0;
// Two nodes shouldn't be in the same position
if (measuredRTT == 0) {
measuredRTT = 1;
}
// Compute relative error of this sample
double relativeError = static_cast<double> (
std::abs(subtractionMagnitude - measuredRTT)) /
static_cast<double> (measuredRTT);
// Calculate weight
if (localError <= 0.0) {
weight = 1;
} else {
if (coordinatesJ.local_error() > 0.0) {
weight = localError / (localError
+ static_cast<double> (std::abs(coordinatesJ.local_error())));
} else {
/* The OSD has not determined its position yet (it has not even
* started), so we just modify limitly ours. (To allow "One client-One
* OSD" situations). */
weight = WEIGHT_IF_OSD_UNINITIALIZED;
}
}
// Calculate proposed movement
double delta;
delta = CONSTANT_C * weight;
double estimatedMovement = (static_cast<double> (measuredRTT)
- subtractionMagnitude) * delta;
// Is the proposed movement too big?
if (forceRecalculation || // Movement must be made anyway
(subtractionMagnitude <= 0.0) || // They both are in the same position
(estimatedMovement < 0.0) || // They must get closer
(std::abs(estimatedMovement) <
subtractionMagnitude * MAX_MOVEMENT_RATIO)) {
// Update local error
if (localError <= 0) {
// We initialize the local error with the first absolute error measured
localError = static_cast<double> (std::abs(subtractionMagnitude - \
static_cast<double> (measuredRTT)));
} else {
// Compute relative weight moving average of local error
localError = (relativeError * CONSTANT_E * weight) +
localError * (1 - (CONSTANT_E * weight));
}
if (subtractionMagnitude > 0.0) {
// Xi = Xi + delta * (rtt - || Xi - Xj ||) * u(Xi - Xj)
xtreemfs::pbrpc::VivaldiCoordinates additionVector(subtractionVector);
if (GetUnitaryCoordinates(&additionVector)) {
MultiplyValueCoordinates(&additionVector, estimatedMovement);
// Move the node according to the calculated addition vector
AddCoordinates(&ownCoordinates, additionVector);
ownCoordinates.set_local_error(localError);
}
} else { // subtractionMagnitude == 0.0
// Both points have the same Coordinates, so we just pull
// them apart in a random direction
xtreemfs::pbrpc::VivaldiCoordinates randomCoords;
ModifyCoordinatesRandomly(&randomCoords);
xtreemfs::pbrpc::VivaldiCoordinates additionVector(randomCoords);
// Xi = Xi + delta * (rtt - || Xi - Xj ||) * u(randomVector)
if (GetUnitaryCoordinates(&additionVector)) {
MultiplyValueCoordinates(&additionVector, estimatedMovement);
// Move the node according to the calculated addition vector
AddCoordinates(&ownCoordinates, additionVector);
ownCoordinates.set_local_error(localError);
}
}
} else {
// The proposed movement is too big according to the current distance
// between nodes
retval = false;
}
return retval;
}
double VivaldiNode::CalculateDistance(
xtreemfs::pbrpc::VivaldiCoordinates coordA,
const xtreemfs::pbrpc::VivaldiCoordinates& coordB) {
SubtractCoordinates(&coordA, coordB);
return MagnitudeCoordinates(coordA);
}
static unsigned int ReadHexInt(const std::string &str, int position) {
return strtoul(str.substr(position, 8).c_str(), NULL, 16);
}
static int64_t ReadHexLongLong(const std::string &str, int position) {
int low = ReadHexInt(str, position);
int high = ReadHexInt(str, position + 8);
// calculate the value: left-shift the upper 4 bytes by 32 bit and
// append the lower 32 bit
int64_t value = (static_cast<int64_t> (high)) << 32 |
((static_cast<int64_t> (low)) & 0xFFFFFFFF);
return value;
}
void OutputUtils::StringToCoordinates(
const std::string& str,
xtreemfs::pbrpc::VivaldiCoordinates & vc) {
int64_t aux_long_x = ReadHexLongLong(str, 0);
int64_t aux_long_y = ReadHexLongLong(str, 16);
int64_t aux_long_err = ReadHexLongLong(str, 32);
vc.set_x_coordinate(static_cast<double> (aux_long_x));
vc.set_y_coordinate(static_cast<double> (aux_long_y));
vc.set_local_error(static_cast<double> (aux_long_err));
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,100 @@
/*
* Copyright (c) 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include <boost/scoped_ptr.hpp>
#include <iostream>
#include <string>
#include "libxtreemfs/client.h"
#include "libxtreemfs/file_handle.h"
#include "libxtreemfs/helper.h"
#include "libxtreemfs/volume.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "lsfs.xtreemfs/lsfs_options.h"
#include "util/logging.h"
#include "xtreemfs/MRC.pb.h"
using namespace std;
using namespace xtreemfs;
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
int main(int argc, char* argv[]) {
// Parse command line options.
LsfsOptions options;
bool invalid_commandline_parameters = false;
try {
options.ParseCommandLine(argc, argv);
} catch(const XtreemFSException& e) {
cout << "Invalid parameters found, error: " << e.what() << endl << endl;
invalid_commandline_parameters = true;
}
// Display help if needed.
if (options.empty_arguments_list || invalid_commandline_parameters) {
cout << options.ShowCommandLineUsage() << endl;
return 1;
}
if (options.show_help) {
cout << options.ShowCommandLineHelp() << endl;
return 1;
}
// Show only the version.
if (options.show_version) {
cout << options.ShowVersion("lsfs.xtreemfs") << endl;
return 1;
}
// Set user_credentials.
UserCredentials user_credentials;
user_credentials.set_username("xtreemfs");
user_credentials.add_groups("xtreemfs");
Auth auth;
if (options.admin_password.empty()) {
auth.set_auth_type(AUTH_NONE);
} else {
auth.set_auth_type(AUTH_PASSWORD);
auth.mutable_auth_passwd()->set_password(options.admin_password);
}
// Create a new client and start it.
boost::scoped_ptr<Client> client(Client::CreateClient(
"DIR-host-not-required-for-lsfs", // Using a bogus value as DIR address. // NOLINT
user_credentials,
options.GenerateSSLOptions(),
options));
client->Start();
// Create the volume.
cout << "Listing all volumes of the MRC: " << options.xtreemfs_url << endl;
bool success = true;
boost::scoped_ptr<xtreemfs::pbrpc::Volumes> volumes(NULL);
try {
volumes.reset(client->ListVolumes(options.mrc_service_address, auth));
} catch (const XtreemFSException& e) {
success = false;
cout << "Failed to list the volumes, error:\n"
<< "\t" << e.what() << endl;
}
// Cleanup.
client->Shutdown();
if (success) {
cout << "Volumes on " << options.mrc_service_address
<< " (Format: volume name -> volume UUID):" << endl;
for (int i = 0; i < volumes->volumes_size(); i++) {
const xtreemfs::pbrpc::Volume& volume = volumes->volumes(i);
cout << "\t" << volume.name() << "\t->\t" << volume.id() << endl;
}
cout << "End of List." << endl;
return 0;
} else {
return 1;
}
}

View File

@@ -0,0 +1,116 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "lsfs.xtreemfs/lsfs_options.h"
#include <boost/program_options/cmdline.hpp>
#include <iostream>
#include <sstream>
#include "libxtreemfs/helper.h"
#include "libxtreemfs/pbrpc_url.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
namespace po = boost::program_options;
namespace style = boost::program_options::command_line_style;
namespace xtreemfs {
LsfsOptions::LsfsOptions() : Options() {
// Modify default options of Options().
max_tries = 1;
helptext_usage_ =
"lsfs.xtreemfs: List the volumes of a specific MRC.\n"
"\n"
"Usage:\n"
"\tlsfs.xtreemfs [options] [pbrpc[g|s]://]<mrc-host>[:port]\n" // NOLINT
"\n"
" Example: lsfs.xtreemfs localhost/myVolume\n";
// Password.
admin_password = "";
po::options_description password_descriptions("Admin Password");
password_descriptions.add_options()
("admin_password",
po::value(&admin_password)->default_value(admin_password),
"MRC's admin_password (not required if not set at the MRC).");
lsfs_descriptions_.add(password_descriptions);
}
void LsfsOptions::ParseCommandLine(int argc, char** argv) {
// Parse general options and retrieve unregistered options for own parsing.
vector<string> options = Options::ParseCommandLine(argc, argv);
// Read Volume URL from command line.
po::positional_options_description p;
p.add("mrc_volume_url", 1);
po::options_description positional_options("List Volumes URL");
positional_options.add_options()
("mrc_volume_url", po::value(&xtreemfs_url), "URL to MRC");
// Parse command line.
po::options_description all_descriptions;
all_descriptions.add(positional_options).add(lsfs_descriptions_);
po::variables_map vm;
try {
po::store(po::command_line_parser(options)
.options(all_descriptions)
.positional(p)
.style(style::default_style & ~style::allow_guessing)
.run(), vm);
po::notify(vm);
} catch(const std::exception& e) {
// Rethrow boost errors due to invalid command line parameters.
throw InvalidCommandLineParametersException(string(e.what()));
}
// Do not check parameters if the help shall be shown.
if (show_help || empty_arguments_list || show_version) {
return;
}
// Extract information from command line.
Options::ParseURL(kMRC);
// Check for MRC host
if(service_addresses.empty()) {
throw InvalidCommandLineParametersException("missing MRC host.");
} else if (service_addresses.IsAddressList()) {
throw InvalidCommandLineParametersException(
"more than one MRC host was specified.");
} else {
mrc_service_address = service_addresses.GetAddresses().front();
}
}
std::string LsfsOptions::ShowCommandLineUsage() {
return helptext_usage_
+ "\nFor complete list of options, please specify -h or --help.\n";
}
std::string LsfsOptions::ShowCommandLineHelp() {
ostringstream stream;
// No help text given in descriptions for positional mount options. Instead
// the usage is explained here.
stream << helptext_usage_
// Descriptions of this class.
<< lsfs_descriptions_
// Descriptions of the general options.
<< endl
<< Options::ShowCommandLineHelpVolumeListing();
return stream.str();
}
} // namespace xtreemfs

View File

@@ -0,0 +1,239 @@
/*
* Copyright (c) 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include <boost/scoped_ptr.hpp>
#include <iostream>
#include <string>
#include "libxtreemfs/client.h"
#include "libxtreemfs/file_handle.h"
#include "libxtreemfs/helper.h"
#include "libxtreemfs/system_user_mapping.h"
#include "libxtreemfs/user_mapping.h"
#include "libxtreemfs/volume.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "mkfs.xtreemfs/mkfs_options.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs;
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
int main(int argc, char* argv[]) {
// Parse command line options.
MkfsOptions options;
bool invalid_commandline_parameters = false;
try {
options.ParseCommandLine(argc, argv);
} catch(const XtreemFSException& e) {
cout << "Invalid parameters found, error: " << e.what() << endl << endl;
invalid_commandline_parameters = true;
}
// Display help if needed.
if (options.empty_arguments_list || invalid_commandline_parameters) {
cout << options.ShowCommandLineUsage() << endl;
return 1;
}
if (options.show_help) {
cout << options.ShowCommandLineHelp() << endl;
return 1;
}
// Show only the version.
if (options.show_version) {
cout << options.ShowVersion("mkfs.xtreemfs") << endl;
return 1;
}
bool success = true;
boost::scoped_ptr<SystemUserMapping> system_user_mapping;
boost::scoped_ptr<Client> client;
try {
// Start logging manually (although it would be automatically started by
// ClientImplementation()) as its required by UserMapping.
initialize_logger(options.log_level_string,
options.log_file_path,
LEVEL_WARN);
// Set user_credentials.
system_user_mapping.reset(SystemUserMapping::GetSystemUserMapping());
// Check if the user specified an additional user mapping in options.
UserMapping* additional_um = UserMapping::CreateUserMapping(
options.additional_user_mapping_type,
options);
if (additional_um) {
system_user_mapping->RegisterAdditionalUserMapping(additional_um);
system_user_mapping->StartAdditionalUserMapping();
}
// If no owner name or owning group name is specified, the MRC uses the
// UserCredentials to set the owner and owning group of the new volume.
// See http://code.google.com/p/xtreemfs/issues/detail?id=204.
UserCredentials user_credentials;
system_user_mapping->GetUserCredentialsForCurrentUser(&user_credentials);
if (!options.owner_username.empty()) {
user_credentials.set_username(options.owner_username);
} else {
#ifndef WIN32
// Warn the user if the SystemUserMapping failed to resolve the UID to
// a string (e.g. when there is no entry in /etc/passwd).
if (CheckIfUnsignedInteger(user_credentials.username())) {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN)
<< "Failed to map the UID "
<< geteuid() << " to a username."
" Now the value \"" << options.owner_username << "\" will be set"
" as owner of the volume."
" Keep in mind that mount.xtreemfs does"
" always try to map UIDs to names. If this is not consistent"
" over all your systems (the UID does not always get mapped to"
" the same name), you may run into permission problems." << endl;
}
}
#endif // !WIN32
}
if (user_credentials.username().empty()) {
cout << "Error: No name found for the current user\n";
return 1;
}
if (!options.owner_groupname.empty()) {
user_credentials.add_groups(options.owner_groupname);
} else {
#ifndef WIN32
// Warn the user if the SystemUserMapping failed to resolve the GID to
// a string (e.g. when there is no entry in /etc/group).
if (CheckIfUnsignedInteger(user_credentials.groups(0))) {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN)
<< "Failed to map the GID " << getegid() << " to a group name."
" Now the value \"" << options.owner_groupname << "\" will be set"
" as owning group of the volume."
" Keep in mind that mount.xtreemfs does"
" always try to map GIDs to names. If this is not consistent over"
" all your systems (the GID does not always get mapped to the"
" same group name), you may run into permission problems."
<< endl;
}
}
#endif // !WIN32
}
if (user_credentials.groups(0).empty()) {
cout << "Error: No name found for the primary group of the current user"
"\n";
return 1;
}
long quota = parseByteNumber(options.volume_quota);
if (quota == -1) {
cout << "Error: " << options.volume_quota << " is not a valid quota.\n";
return 1;
}
if (quota < 0) {
cout << "Error: Quota has to be greater or equal zero \n";
return 1;
}
Auth auth;
if (options.admin_password.empty()) {
auth.set_auth_type(AUTH_NONE);
} else {
auth.set_auth_type(AUTH_PASSWORD);
auth.mutable_auth_passwd()->set_password(options.admin_password);
}
// Repeat the used options.
cout << "Trying to create the volume: " << options.xtreemfs_url << "\n"
<< "\n"
<< "Using options:\n";
if (!options.owner_username.empty()) {
cout << " Owner:\t\t\t" << options.owner_username << "\n";
} else {
if (!options.SSLEnabled()) {
// We cannot tell if it's a user certificate - in that case the MRC
// ignores the UserCredentials and extracts the owner from the cert.
// To be on the safe side, we output the definite owner only in non-SSL
// cases.
cout << " Owner:\t\t\t" << user_credentials.username() << "\n";
}
}
if (!options.owner_groupname.empty()) {
cout << " Owning group:\t\t\t" << options.owner_groupname << "\n";
} else {
if (!options.SSLEnabled()) {
// We cannot tell if it's a user certificate - in that case the MRC
// ignores the UserCredentials and extracts the owner from the cert.
// To be on the safe side, we output the definite owner only in non-SSL
// cases.
cout << " Owning group:\t\t\t" << user_credentials.groups(0) << "\n";
}
}
cout << " Mode:\t\t\t\t" << options.volume_mode_octal << "\n"
<< " Access Control Policy:\t" << options.access_policy_type_string
<< "\n"
<< " Quota:\t\t\t" << options.volume_quota << "\n"
<< "\n"
<< " Default striping policy:\t\t"
<< options.default_striping_policy_type_string << "\n"
<< " Default stripe size (object size):\t"
<< options.default_stripe_size << "\n"
<< " Default stripe width (# OSDs):\t"
<< options.default_stripe_width << "\n"
<< "\n";
if (options.volume_attributes.size() > 0) {
cout << " Volume attributes (Name = Value)" << endl;
for (list<KeyValuePair*>::iterator it = options.volume_attributes.begin();
it != options.volume_attributes.end();
++it) {
cout << " " << (*it)->key() << " = " << (*it)->value() << endl;
}
cout << endl;
}
// Create a new client and start it.
client.reset(Client::CreateClient(
"DIR-host-not-required-for-mkfs", // Using a bogus value as DIR address. // NOLINT
user_credentials,
options.GenerateSSLOptions(),
options));
client->Start();
// Create the volume on the MRC.
client->CreateVolume(options.mrc_service_address,
auth,
user_credentials,
options.volume_name,
options.volume_mode_decimal,
options.owner_username,
options.owner_groupname,
options.access_policy_type,
quota,
options.default_striping_policy_type,
options.default_stripe_size,
options.default_stripe_width,
options.volume_attributes);
} catch (const XtreemFSException& e) {
success = false;
cout << "Failed to create the volume, error:\n"
<< "\t" << e.what() << endl;
}
// Cleanup.
if (client) {
client->Shutdown();
}
system_user_mapping->StopAdditionalUserMapping();
if (success) {
cout << "Successfully created volume \"" << options.volume_name << "\" at "
"MRC: " << options.xtreemfs_url << endl;
return 0;
} else {
return 1;
}
}

View File

@@ -0,0 +1,280 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "mkfs.xtreemfs/mkfs_options.h"
#include <cmath>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/program_options/cmdline.hpp>
#include <iostream>
#include <sstream>
#include "libxtreemfs/helper.h"
#include "libxtreemfs/pbrpc_url.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
namespace po = boost::program_options;
namespace style = boost::program_options::command_line_style;
namespace xtreemfs {
MkfsOptions::MkfsOptions() : Options() {
// Modify default options of Options().
max_tries = 1;
helptext_usage_ =
"mkfs.xtreemfs: Create an XtreemFS Volume on a specific MRC.\n"
"\n"
"Usage:\n"
"\tmkfs.xtreemfs [options] [pbrpc[g|s]://]<mrc-host>[:port]/<new-volume-name>\n" // NOLINT
"\n"
" Example: mkfs.xtreemfs localhost/myVolume\n";
// Password.
admin_password = "";
po::options_description password_descriptions("Admin Password");
password_descriptions.add_options()
("admin_password",
po::value(&admin_password)->default_value(admin_password),
"MRC's admin_password (not required if not set at the MRC).");
// Volume options.
volume_mode_decimal = 511;
volume_mode_octal = 777;
volume_quota = "0";
owner_username = "";
owner_groupname = "";
access_policy_type = xtreemfs::pbrpc::ACCESS_CONTROL_POLICY_POSIX;
access_policy_type_string = "POSIX";
po::options_description volume_descriptions("Volume Options");
volume_descriptions.add_options()
("mode,m",
po::value(&volume_mode_octal)->default_value(volume_mode_octal),
"Mode of the volume's root directory.")
("owner-username,u",
po::value(&owner_username), "Owner of the new volume "
"(by default it is the username of the effective user id).")
("owner-groupname,g",
po::value(&owner_groupname), "Owning group of the new"
" volume (by default it is the groupname of the effective group id).")
("access-control-policy,a",
po::value(&access_policy_type_string)
->default_value(access_policy_type_string),
"Access-control-policy=NULL|POSIX|VOLUME")
("quota,q", po::value(&volume_quota)->default_value(volume_quota),
"Quota of the volume in bytes (default value 0, i.e. quota is disabled), format: <value>M|G|T");
// Striping policy options.
default_striping_policy_type = xtreemfs::pbrpc::STRIPING_POLICY_RAID0;
default_striping_policy_type_string = "RAID0";
default_stripe_size = 128;
default_stripe_width = 1;
po::options_description striping_policy_descriptions_(
"Striping Policy Options");
striping_policy_descriptions_.add_options()
("striping-policy,p",
po::value(&default_striping_policy_type_string)
->default_value(default_striping_policy_type_string),
"Striping policy=RAID0")
("striping-policy-stripe-size,s",
po::value(&default_stripe_size)->default_value(default_stripe_size),
"Stripe size in kB.")
("striping-policy-width,w",
po::value(&default_stripe_width)->default_value(default_stripe_width),
"Number of OSDs (stripes) per replica.");
// Volume Attributes.
chown_non_root = false;
po::options_description volume_attributes_descriptions_(
"Volume Attributes");
volume_attributes_descriptions_.add_options()
("volume-attribute",
po::value< vector<string> >(&volume_attributes_strings),
"Define volume specific attributes of the form name=value, e.g. "
"\"chown_non_root=true\".")
("chown-non-root",
po::value(&chown_non_root)->zero_tokens(),
"Shortcut for --volume-attribute chown_non_root=true. If this attribute is"
" not set, regular users (everybody except root) are not allowed to change"
" the ownership of their _own_ files.");
mkfs_descriptions_.add(password_descriptions)
.add(volume_descriptions)
.add(striping_policy_descriptions_)
.add(volume_attributes_descriptions_);
}
MkfsOptions::~MkfsOptions() {
for (list<KeyValuePair*>::iterator it = volume_attributes.begin();
it != volume_attributes.end();
++it) {
delete *it; // Free memory.
}
}
void MkfsOptions::ParseCommandLine(int argc, char** argv) {
// Parse general options and retrieve unregistered options for own parsing.
vector<string> options = Options::ParseCommandLine(argc, argv);
// Read Volume URL from command line.
po::positional_options_description p;
p.add("mrc_volume_url", 1);
po::options_description positional_options("Create Volume URL");
positional_options.add_options()
("mrc_volume_url", po::value(&xtreemfs_url), "volume to create");
// Parse command line.
po::options_description all_descriptions;
all_descriptions.add(positional_options).add(mkfs_descriptions_);
po::variables_map vm;
try {
po::store(po::command_line_parser(options)
.options(all_descriptions)
.positional(p)
.style(style::default_style & ~style::allow_guessing)
.run(), vm);
po::notify(vm);
} catch(const std::exception& e) {
// Rethrow boost errors due to invalid command line parameters.
throw InvalidCommandLineParametersException(string(e.what()));
}
// Do not check parameters if the help shall be shown.
if (show_help || empty_arguments_list || show_version) {
return;
}
// Extract information from command line.
Options::ParseURL(kMRC);
// Check for MRC host
if(service_addresses.empty()) {
throw InvalidCommandLineParametersException("missing MRC host.");
} else if (service_addresses.IsAddressList()) {
throw InvalidCommandLineParametersException(
"more than one MRC host was specified.");
} else {
mrc_service_address = service_addresses.GetAddresses().front();
}
// Check for required parameters.
if (volume_name.empty()) {
throw InvalidCommandLineParametersException("missing volume name.");
}
// Abort the user explicitly specified numeric ids as owner.
if (CheckIfUnsignedInteger(owner_username)) {
throw InvalidCommandLineParametersException("Do not use numeric IDs as "
"owner. Use names instead, e.g. \"root\" instead of \"0\".");
}
if (CheckIfUnsignedInteger(owner_groupname)) {
throw InvalidCommandLineParametersException("Do not use numeric IDs as "
"owner group. Use names instead, e.g. \"root\" instead of \"0\".");
}
// Convert the mode from octal to decimal.
volume_mode_decimal = OctalToDecimal(volume_mode_octal);
if (boost::iequals(access_policy_type_string, "NULL")) {
access_policy_type = xtreemfs::pbrpc::ACCESS_CONTROL_POLICY_NULL;
} else if (boost::iequals(access_policy_type_string, "POSIX")) {
access_policy_type = xtreemfs::pbrpc::ACCESS_CONTROL_POLICY_POSIX;
} else if (boost::iequals(access_policy_type_string, "VOLUME")) {
access_policy_type = xtreemfs::pbrpc::ACCESS_CONTROL_POLICY_VOLUME;
} else {
throw InvalidCommandLineParametersException("Unknown access policy (" +
access_policy_type_string + ") specified.");
}
if (boost::iequals(default_striping_policy_type_string, "RAID0")) {
default_striping_policy_type = xtreemfs::pbrpc::STRIPING_POLICY_RAID0;
} else {
throw InvalidCommandLineParametersException("Currently the RAID0 striping"
"policy is the only one available. Set the stripe width (see -w) to 1"
" to disable striping at all.");
}
// Process volume attributes shortcuts.
if (chown_non_root) {
volume_attributes_strings.push_back("chown_non_root=true");
}
if (grid_auth_mode_globus) {
volume_attributes_strings.push_back("globus_gridmap=true");
}
if (grid_auth_mode_unicore) {
volume_attributes_strings.push_back("unicore_uudb=true");
}
// Parse list of volume attributes.
for (size_t i = 0; i < volume_attributes_strings.size(); i++) {
// Check if there is exactly one "=" delimiter.
size_t first_match = volume_attributes_strings[i].find_first_of("=");
if (first_match == string::npos) {
throw InvalidCommandLineParametersException("The attribute key/value pair"
" " + volume_attributes_strings[i] + " misses a \"=\".");
}
size_t next_match = volume_attributes_strings[i].find_first_of(
"=",
first_match + 1);
if (next_match != string::npos) {
throw InvalidCommandLineParametersException("The attribute key/value pair"
" " + volume_attributes_strings[i] + " must not contain"
" multiple \"=\".");
}
// Parse attribute.
KeyValuePair* attribute = new KeyValuePair();
attribute->set_key(volume_attributes_strings[i].substr(0, first_match));
attribute->set_value(volume_attributes_strings[i].substr(
min(first_match + 1, volume_attributes_strings[i].length()),
max(static_cast<size_t>(0),
volume_attributes_strings[i].length() - first_match)));
volume_attributes.push_back(attribute);
}
}
std::string MkfsOptions::ShowCommandLineUsage() {
return helptext_usage_
+ "\nFor complete list of options, please specify -h or --help.\n";
}
std::string MkfsOptions::ShowCommandLineHelp() {
ostringstream stream;
// No help text given in descriptions for positional mount options. Instead
// the usage is explained here.
stream << helptext_usage_
// Descriptions of this class.
<< mkfs_descriptions_
// Descriptions of the general options.
<< endl
<< Options::ShowCommandLineHelpVolumeCreationAndDeletion();
return stream.str();
}
int MkfsOptions::OctalToDecimal(int octal) {
int result = 0;
for(int i = 0; octal != 0; i++) {
int remainder = octal % 10;
result += remainder * static_cast<int>(pow(static_cast<double>(8), i));
octal /= 10;
}
return result;
}
} // namespace xtreemfs

View File

@@ -0,0 +1,141 @@
/*
* Copyright (c) 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include <boost/scoped_ptr.hpp>
#include <iostream>
#include <string>
#include "libxtreemfs/client.h"
#include "libxtreemfs/file_handle.h"
#include "libxtreemfs/helper.h"
#include "libxtreemfs/system_user_mapping.h"
#include "libxtreemfs/user_mapping.h"
#include "libxtreemfs/volume.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "rmfs.xtreemfs/rmfs_options.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs;
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
int main(int argc, char* argv[]) {
// Parse command line options.
RmfsOptions options;
bool invalid_commandline_parameters = false;
try {
options.ParseCommandLine(argc, argv);
} catch(const XtreemFSException& e) {
cout << "Invalid parameters found, error: " << e.what() << endl << endl;
invalid_commandline_parameters = true;
}
// Display help if needed.
if (options.empty_arguments_list || invalid_commandline_parameters) {
cout << options.ShowCommandLineUsage() << endl;
return 1;
}
if (options.show_help) {
cout << options.ShowCommandLineHelp() << endl;
return 1;
}
// Show only the version.
if (options.show_version) {
cout << options.ShowVersion("rmfs.xtreemfs") << endl;
return 1;
}
// Safety question
if (!options.force) {
string answer;
cout << "Do you really want to delete the volume: \""
<< options.xtreemfs_url << "\"?" << endl
<< "Answer with \"YES\" to proceed: ";
getline(cin, answer);
if (answer != "YES") {
return 1;
}
}
bool success = true;
boost::scoped_ptr<SystemUserMapping> system_user_mapping;
boost::scoped_ptr<Client> client;
try {
// Start logging manually (although it would be automatically started by
// ClientImplementation()) as its required by UserMapping.
initialize_logger(options.log_level_string,
options.log_file_path,
LEVEL_WARN);
// Set user_credentials.
system_user_mapping.reset(SystemUserMapping::GetSystemUserMapping());
// Check if the user specified an additional user mapping in options.
UserMapping* additional_um = UserMapping::CreateUserMapping(
options.additional_user_mapping_type,
options);
if (additional_um) {
system_user_mapping->RegisterAdditionalUserMapping(additional_um);
system_user_mapping->StartAdditionalUserMapping();
}
UserCredentials user_credentials;
system_user_mapping->GetUserCredentialsForCurrentUser(&user_credentials);
if (user_credentials.username().empty()) {
cout << "Error: No name found for the current user (using the configured "
"UserMapping: " << options.additional_user_mapping_type << ")\n";
return 1;
}
// The groups won't be checked and therefore may be empty.
// Create a new client and start it.
client.reset(Client::CreateClient(
"DIR-host-not-required-for-rmfs", // Using a bogus value as DIR address. // NOLINT
user_credentials,
options.GenerateSSLOptions(),
options));
client->Start();
// Delete the volume.
Auth auth;
if (options.admin_password.empty()) {
auth.set_auth_type(AUTH_NONE);
} else {
auth.set_auth_type(AUTH_PASSWORD);
auth.mutable_auth_passwd()->set_password(options.admin_password);
}
cout << "Trying to delete the volume: " << options.xtreemfs_url << endl;
client->DeleteVolume(options.mrc_service_address,
auth,
user_credentials,
options.volume_name);
} catch (const XtreemFSException& e) {
success = false;
cout << "Failed to delete the volume, error:\n"
<< "\t" << e.what() << endl;
}
// Cleanup.
if (client) {
client->Shutdown();
}
system_user_mapping->StopAdditionalUserMapping();
if (success) {
cout << "Successfully deleted the volume \"" << options.volume_name
<< "\" at MRC: " << options.mrc_service_address << "\n"
<< "\n"
<< "The disk space on the OSDs, occupied by the objects of the\n"
"files of the deleted volume, is not freed yet.\n"
<< "Run the tool 'xtfs_cleanup' to free it." << endl;
return 0;
} else {
return 1;
}
}

View File

@@ -0,0 +1,127 @@
/*
* Copyright (c) 2011 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "rmfs.xtreemfs/rmfs_options.h"
#include <boost/program_options/cmdline.hpp>
#include <iostream>
#include <sstream>
#include "libxtreemfs/helper.h"
#include "libxtreemfs/pbrpc_url.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
namespace po = boost::program_options;
namespace style = boost::program_options::command_line_style;
namespace xtreemfs {
RmfsOptions::RmfsOptions() : Options(),
rmfs_descriptions_("rmfs.xtreemfs Options") {
// Modify default options of Options().
max_tries = 1;
helptext_usage_ =
"rmfs.xtreemfs: Delete an XtreemFS Volume on a specific MRC.\n"
"\n"
"Usage:\n"
"\trmfs.xtreemfs [options] [pbrpc[g|s]://]<mrc-host>[:port]/<volume-name>\n" // NOLINT
"\n"
" Example: rmfs.xtreemfs localhost/myVolume\n";
// Password.
admin_password = "";
force = false;
rmfs_descriptions_.add_options()
("admin_password",
po::value(&admin_password)->default_value(admin_password),
"MRC's admin_password (not required if not set at the MRC).")
("force,f",
po::value(&force)->default_value(force)->zero_tokens(),
"Never prompt. Overrides safety questions.");
// TODO(mberlin): Add an option to specify the triggering of the cleanup
// process to immediately free (now) orphaned objects.
}
void RmfsOptions::ParseCommandLine(int argc, char** argv) {
// Parse general options and retrieve unregistered options for own parsing.
vector<string> options = Options::ParseCommandLine(argc, argv);
// Read Volume URL from command line.
po::positional_options_description p;
p.add("mrc_volume_url", 1);
po::options_description positional_options("Delete Volume URL");
positional_options.add_options()
("mrc_volume_url", po::value(&xtreemfs_url), "volume to delete");
// Parse command line.
po::options_description all_descriptions;
all_descriptions.add(positional_options).add(rmfs_descriptions_);
po::variables_map vm;
try {
po::store(po::command_line_parser(options)
.options(all_descriptions)
.positional(p)
.style(style::default_style & ~style::allow_guessing)
.run(), vm);
po::notify(vm);
} catch(const std::exception& e) {
// Rethrow boost errors due to invalid command line parameters.
throw InvalidCommandLineParametersException(string(e.what()));
}
// Do not check parameters if the help shall be shown.
if (show_help || empty_arguments_list || show_version) {
return;
}
// Extract information from command line.
Options::ParseURL(kMRC);
// Check for MRC host
if(service_addresses.empty()) {
throw InvalidCommandLineParametersException("missing MRC host.");
} else if (service_addresses.IsAddressList()) {
throw InvalidCommandLineParametersException(
"more than one MRC host was specified.");
} else {
mrc_service_address = service_addresses.GetAddresses().front();
}
// Check for required parameters.
if (volume_name.empty()) {
throw InvalidCommandLineParametersException("missing volume name.");
}
}
std::string RmfsOptions::ShowCommandLineUsage() {
return helptext_usage_
+ "\nFor complete list of options, please specify -h or --help.\n";
}
std::string RmfsOptions::ShowCommandLineHelp() {
ostringstream stream;
// No help text given in descriptions for positional mount options. Instead
// the usage is explained here.
stream << helptext_usage_
<< endl
// Descriptions of this class.
<< rmfs_descriptions_
// Descriptions of the general options.
<< endl
<< Options::ShowCommandLineHelpVolumeCreationAndDeletion();
return stream.str();
}
} // namespace xtreemfs

845
cpp/src/rpc/client.cpp Normal file
View File

@@ -0,0 +1,845 @@
/*
* Copyright (c) 2009-2010 by Bjoern Kolbeck, Zuse Institute Berlin
* 2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "rpc/client.h"
#include <cstdio>
#include <cstdlib>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/interprocess/detail/atomic.hpp>
#include <fstream>
#include <iostream>
#include <utility>
#include <set>
#include <string>
#include "util/logging.h"
#ifdef HAS_OPENSSL
#include <boost/asio/ssl.hpp>
#include <openssl/err.h>
#include <openssl/pem.h>
#include <openssl/pkcs12.h>
#include <openssl/rand.h>
#include <openssl/x509.h>
#endif // HAS_OPENSSL
#ifdef WIN32
#include <tchar.h>
#else
#include <unistd.h>
#endif // WIN32
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
using namespace std;
using namespace boost;
using namespace google::protobuf;
#if (BOOST_VERSION < 104800)
using boost::interprocess::detail::atomic_inc32;
#else
using boost::interprocess::ipcdetail::atomic_inc32;
#endif // BOOST_VERSION < 104800
#ifdef _MSC_VER
// Disable "warning C4996: 'strdup': The POSIX name for this item is deprecated. Instead, use the ISO C++ conformant name: _strdup. // NOLINT
#pragma warning(push)
#pragma warning(disable:4996)
#endif // _MSC_VER
namespace xtreemfs {
namespace rpc {
Client::Client(int32_t connect_timeout_s,
int32_t request_timeout_s,
int32_t max_con_linger,
const SSLOptions* options)
: service_(),
stopped_(false),
stopped_ioservice_only_(false),
callid_counter_(1),
rq_timeout_timer_(service_),
rq_timeout_s_(request_timeout_s),
connect_timeout_s_(connect_timeout_s),
max_con_linger_(max_con_linger)
#ifndef HAS_OPENSSL
{
// Delete SSL options because they are not used when not compiled with SSL.
delete options;
}
#else
,use_gridssl_(false),
ssl_options(options),
pemFileName(NULL),
certFileName(NULL),
trustedCAsFileName(NULL),
ssl_context_(NULL) {
// Check if ssl options were passed.
if (options != NULL) {
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO) << "SSL support activated." << endl;
}
use_gridssl_ = options->use_grid_ssl();
ssl_context_ = new boost::asio::ssl::context(
service_,
string_to_ssl_method(
options->ssl_method_string(),
boost::asio::ssl::context_base::sslv23_client));
ssl_context_->set_options(boost::asio::ssl::context::no_sslv2);
#if (BOOST_VERSION > 104601)
// Verify certificate callback can be conveniently specified from
// Boost 1.47.0 onwards.
ssl_context_->set_verify_mode(boost::asio::ssl::context::verify_peer |
boost::asio::ssl::context::verify_fail_if_no_peer_cert);
ssl_context_->set_verify_callback(boost::bind(&Client::verify_certificate_callback,
this, _1, _2));
#else // BOOST_VERSION > 104601
// The verify callback is not a Client member here, so make sure it can
// retrieve the ssl_options later.
SSL_CTX_set_app_data(ssl_context_->impl(), ssl_options);
SSL_CTX_set_verify(ssl_context_->impl(),
boost::asio::ssl::context::verify_peer |
boost::asio::ssl::context::verify_fail_if_no_peer_cert,
&xtreemfs::rpc::verify_certificate_callback);
#endif // BOOST_VERSION > 104601
OpenSSL_add_all_algorithms();
OpenSSL_add_all_ciphers();
OpenSSL_add_all_digests();
SSL_load_error_strings();
// check if pkcs12 was entered
// the pkcs12 file has to be read using openssl
// afterwards the pem- and cert-files are written to temporary files on disk
// these can be accessed by boost::assio::ssl
if (!options->pkcs12_file_name().empty()) {
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO) << "SSL support using PKCS#12 file "
<< options->pkcs12_file_name() << endl;
}
std::string pemFileTemplate = "pmXXXXXX";
std::string certFileTemplate = "ctXXXXXX";
FILE *p12_file = fopen(options->pkcs12_file_name().c_str(), "rb");
// read the pkcs12 file
if (!p12_file) {
Logging::log->getLog(LEVEL_ERROR) << "Error opening PKCS#12 file: "
<< options->pkcs12_file_name() << ". (file not found)" << endl;
//TODO(mberlin): Use a better approach than exit - throw?
exit(1);
}
PKCS12* p12 = d2i_PKCS12_fp(p12_file, NULL);
fclose(p12_file);
if (!p12) {
Logging::log->getLog(LEVEL_ERROR) << "Error reading PKCS#12 file: "
<< options->pkcs12_file_name() << ". (no access rights?)" << endl;
ERR_print_errors_fp(stderr);
//TODO(mberlin): Use a better approach than exit - throw?
exit(1);
}
EVP_PKEY* pkey = NULL;
X509* cert = NULL;
STACK_OF(X509)* ca = NULL;
// parse pkcs12 file
if (!PKCS12_parse(p12,
options->pkcs12_file_password().c_str(),
&pkey,
&cert,
&ca)) {
Logging::log->getLog(LEVEL_ERROR) << "Error parsing PKCS#12 file: "
<< options->pkcs12_file_name()
<< " Please check if the supplied certificate password is correct."
<< endl;
ERR_print_errors_fp(stderr);
//TODO(mberlin): Use a better approach than exit - throw?
exit(1);
}
PKCS12_free(p12);
if (ca == NULL) {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN) << "Expected one or more additional "
"certificates in " << options->pkcs12_file_name() << " in order "
"to verify the services' certificates." << endl;
}
} else if (sk_X509_num(ca) > 0) {
// Setup any additional certificates as trusted root CAs in one file.
std::string trusted_cas_template("caXXXXXX");
FILE* trusted_cas_file =
create_and_open_temporary_ssl_file(&trusted_cas_template, "ab+");
trustedCAsFileName = strdup(trusted_cas_template.c_str());
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO) << "Writing " << sk_X509_num(ca)
<< " verification certificates to " << trustedCAsFileName << endl;
}
while (sk_X509_num(ca) > 0) {
X509* ca_cert = sk_X509_pop(ca);
// _AUX writes trusted certificates.
if (PEM_write_X509_AUX(trusted_cas_file, ca_cert)) {
} else {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN) << "Error writing a CA to file "
<< trusted_cas_template << ", continuing without it." << endl;
}
}
X509_free(ca_cert);
}
fclose(trusted_cas_file);
}
sk_X509_free(ca);
// create two tmp files containing the PEM certificates.
// these which be deleted when exiting the program
FILE* pemFile = create_and_open_temporary_ssl_file(&pemFileTemplate, "wb+");
FILE* certFile = create_and_open_temporary_ssl_file(&certFileTemplate, "wb+");
if (pemFile == NULL || certFile == NULL) {
Logging::log->getLog(LEVEL_ERROR) << "Error creating temporary "
"certificates" << endl;
//TODO(mberlin): Use a better approach than exit - throw?
exit(1);
}
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "tmp file name:"
<< pemFileTemplate << " " << certFileTemplate << endl;
}
// write private key
// use the pkcs12 password as the pem password
char* password = strdup(options->pkcs12_file_password().c_str());
if (!PEM_write_PrivateKey(pemFile, pkey, NULL, NULL, 0, 0, password)) {
Logging::log->getLog(LEVEL_ERROR)
<< "Error writing pem file:" << pemFileTemplate << endl;
free(password);
EVP_PKEY_free(pkey);
unlink(pemFileTemplate.c_str());
unlink(certFileTemplate.c_str());
//TODO(mberlin): Use a better approach than exit - throw?
exit(1);
}
free(password);
EVP_PKEY_free(pkey);
// write ca certificate
if (!PEM_write_X509(certFile, cert)) {
Logging::log->getLog(LEVEL_ERROR) << "Error writing cert file:"
<< certFileTemplate << endl;
X509_free(cert);
unlink(pemFileTemplate.c_str());
unlink(certFileTemplate.c_str());
//TODO(mberlin): Use a better approach than exit - throw?
exit(1);
}
X509_free(cert);
fclose(pemFile);
fclose(certFile);
pemFileName = strdup(pemFileTemplate.c_str());
certFileName = strdup(certFileTemplate.c_str());
ssl_context_->set_password_callback(
boost::bind(&Client::get_pkcs12_password_callback, this));
ssl_context_->use_private_key_file(pemFileName, options->cert_format());
ssl_context_->use_certificate_chain_file(certFileName);
#if (BOOST_VERSION > 104601)
// Use system default path for trusted root CAs and any supplied certificates.
ssl_context_->set_default_verify_paths();
#endif // BOOST_VERSION > 104601
if (trustedCAsFileName != NULL) {
ssl_context_->load_verify_file(trustedCAsFileName);
}
// FIXME(ps) make sure that the temporary files are deleted!
} else if (!options->pem_file_name().empty()) {
// otherwise use the pem files
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO) << "SSL support using PEM private key"
" file " << options->pem_file_name() << endl;
}
try {
ssl_context_->set_password_callback(
boost::bind(&Client::get_pem_password_callback, this));
ssl_context_->use_private_key_file(options->pem_file_name(),
options->cert_format());
ssl_context_->use_certificate_chain_file(options->pem_cert_name());
#if (BOOST_VERSION > 104601)
// Use system default path for trusted root CAs and any supplied certificates.
ssl_context_->set_default_verify_paths();
#endif // BOOST_VERSION > 104601
if (options->pem_trusted_certs_file_name().empty()) {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN) << "Not using any additional "
"certificates in order to verify the services' certificates."
<< endl;
}
} else {
ssl_context_->load_verify_file(options->pem_trusted_certs_file_name());
}
} catch(invalid_argument& ia) {
cerr << "Invalid argument: " << ia.what() << endl;
cerr << "Please check your private key and certificate file."<< endl;
//TODO(mberlin): Use a better approach than exit - throw?
exit(1);
}
}
// Cleanup thread-local OpenSSL state.
ERR_free_strings();
#if (OPENSSL_VERSION_NUMBER < 0x1000000fL)
ERR_remove_state(0);
#else // OPENSSL_VERSION_NUMBER < 0x1000000fL
ERR_remove_thread_state(NULL);
#endif // OPENSSL_VERSION_NUMBER < 0x1000000fL
}
}
std::string Client::get_pem_password_callback() const {
return ssl_options->pem_file_password();
}
std::string Client::get_pkcs12_password_callback() const {
return ssl_options->pkcs12_file_password();
}
#if (BOOST_VERSION > 104601)
bool Client::verify_certificate_callback(bool preverified,
boost::asio::ssl::verify_context& context) const {
X509_STORE_CTX *sctx = context.native_handle();
#else // BOOST_VERSION > 104601
int verify_certificate_callback(int preverify_ok, X509_STORE_CTX *sctx) {
bool preverified = preverify_ok == 1;
SSL *ssl = static_cast<SSL*>(X509_STORE_CTX_get_ex_data(
sctx,
SSL_get_ex_data_X509_STORE_CTX_idx()));
SSL_CTX *ctx = SSL_get_SSL_CTX(ssl);
SSLOptions *ssl_options = static_cast<SSLOptions*>(SSL_CTX_get_app_data(ctx));
#endif
X509* cert = X509_STORE_CTX_get_current_cert(sctx);
X509_NAME *subject_name = X509_get_subject_name(cert);
BIO *subject_name_out = BIO_new(BIO_s_mem());
X509_NAME_print_ex(subject_name_out, subject_name, 0, XN_FLAG_RFC2253);
char *subject_start = NULL, *subject = NULL;
long subject_length = BIO_get_mem_data(subject_name_out, &subject_start);
subject = new char[subject_length + 1];
memcpy(subject, subject_start, subject_length);
subject[subject_length] = '\0';
BIO_free(subject_name_out);
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "Verifying subject '" << subject
<< "'." << endl;
}
bool override = false;
if (sctx->error != 0) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "OpenSSL verify error: "
<< sctx->error << endl;
}
// Ignore error if verification is turned off in general or the error has
// been disabled specifically.
if (!ssl_options->verify_certificates() ||
ssl_options->ignore_verify_error(sctx->error)) {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN) << "Ignoring OpenSSL verify error: "
<< sctx->error << " because of user settings." << endl;
}
override = true;
}
}
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "Verification of subject '" << subject
<< "' was " << (preverified ? "successful." : "unsuccessful.")
<< ((!preverified && override) ? " Overriding because of user settings." : "")
<< endl;
}
delete[] subject;
#if (BOOST_VERSION > 104601)
return preverified || override;
#else // BOOST_VERSION > 104601
return (preverified || override) ? 1 : 0;
#endif // BOOST_VERSION > 104601
}
#endif // HAS_OPENSSL
void Client::sendRequest(const string& address,
int32_t interface_id,
int32_t proc_id,
const UserCredentials& userCreds,
const Auth& auth,
const Message* message,
const char* data,
int data_length,
Message* response_message,
void* context,
ClientRequestCallbackInterface *callback) {
uint32_t call_id = atomic_inc32(&callid_counter_);
ClientRequest* request = new ClientRequest(address,
call_id,
interface_id,
proc_id,
userCreds,
auth,
message,
data,
data_length,
response_message,
context,
callback);
boost::mutex::scoped_lock lock(requests_mutex_);
if (stopped_) {
lock.unlock();
AbortClientRequest(request,
"Request aborted since RPC client was stopped.");
} else {
bool wasEmpty = requests_.empty();
requests_.push(request);
if (wasEmpty) {
service_.post(boost::bind(&Client::sendInternalRequest, this));
}
}
}
void Client::sendInternalRequest() {
if (stopped_ioservice_only_) {
return;
}
// Process requests.
do {
ClientRequest *rq = NULL;
{
boost::mutex::scoped_lock lock(requests_mutex_);
if (requests_.empty())
break;
rq = requests_.front();
requests_.pop();
}
assert(rq != NULL);
rq->RequestSent();
ClientConnection *con = NULL;
connection_map::iterator iter = connections_.find(rq->address());
if (iter != connections_.end())
con = iter->second;
if (con) {
con->AddRequest(rq);
con->DoProcess();
} else {
// New connection.
const std::string &addr = rq->address();
int colonpos = addr.find_last_of(":");
if (colonpos < 0) {
RPCHeader::ErrorResponse* err = new RPCHeader::ErrorResponse();
err->set_error_message(std::string("invalid address: ") + addr);
err->set_error_type(IO_ERROR);
err->set_posix_errno(POSIX_ERROR_EINVAL);
rq->set_error(err);
rq->ExecuteCallback();
} else {
try {
std::string server = addr.substr(0, colonpos);
std::string port = addr.substr(colonpos + 1);
con = new ClientConnection(server,
port,
service_,
&request_table_,
connect_timeout_s_,
connect_timeout_s_
#ifdef HAS_OPENSSL
,use_gridssl_,
ssl_context_
#endif // HAS_OPENSSL
);
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "new connection for "
<< addr << endl;
}
connections_[addr] = con;
con->AddRequest(rq);
con->DoProcess();
} catch(std::out_of_range &exception) {
RPCHeader::ErrorResponse* err = new RPCHeader::ErrorResponse();
err->set_error_message(std::string("exception: ")
+ exception.what());
err->set_error_type(ERRNO);
err->set_posix_errno(POSIX_ERROR_EINVAL);
rq->set_error(err);
rq->ExecuteCallback();
}
}
}
} while (true);
}
void Client::handleTimeout(const boost::system::error_code& error) {
// Do nothing when the timer was canceled.
if (error == boost::asio::error::operation_aborted
|| stopped_ioservice_only_) {
return;
}
try {
posix_time::ptime deadline = posix_time::microsec_clock::local_time()
- posix_time::seconds(rq_timeout_s_);
// Connections which have timed out requests have to be reset later.
set<ClientConnection*> to_be_reset_cons;
// Remove all timed out requests.
request_map::iterator iter = request_table_.begin();
while (iter != request_table_.end()) {
ClientRequest* rq = iter->second;
if (rq->time_sent() < deadline) {
ClientConnection* respective_con = rq->client_connection();
assert(respective_con);
to_be_reset_cons.insert(respective_con);
string error = "Request timed out (call id = "
+ boost::lexical_cast<string>(rq->call_id())
+ ", interface id = "
+ boost::lexical_cast<string>(rq->interface_id())
+ ", proc id = " + boost::lexical_cast<string>(rq->proc_id())
+ ", server = " + respective_con->GetServerAddress()
+ ").";
RPCHeader::ErrorResponse* err = new RPCHeader::ErrorResponse();
err->set_error_message(error);
err->set_error_type(IO_ERROR);
err->set_posix_errno(POSIX_ERROR_EINVAL);
rq->set_error(err);
rq->ExecuteCallback();
request_table_.erase(iter++);
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO) << error << endl;
}
} else {
++iter;
}
}
// Reset all connections which had timed out requests.
for (set<ClientConnection*>::iterator iter = to_be_reset_cons.begin();
iter != to_be_reset_cons.end();
iter++) {
// Since this request timed out, its callback will be executed.
// The callback may delete rq.rq_data() while boost::asio is still
// trying to send this data. To avoid possible segmentation faults,
// all pending boost::asio async_write for the request's connection
// are aborted by closing the connection. This is the only portable
// way to cancel a pending request.
// See the remarks here: http://www.boost.org/doc/libs/1_45_0/doc/html/boost_asio/reference/basic_stream_socket/cancel/overload2.html // NOLINT
// Closing the connection would be required anyway if the timeout
// was caused by a network connection problem which would result in
// an aborted TCP connection. Only, if the time out was caused by
// an overloaded server, we would close the connection when it was
// not needed.
string error = "Another request of this requests's connection timed out. "
"Therefore the connection had to be closed and this request aborted.";
(*iter)->Reset();
(*iter)->SendError(POSIX_ERROR_EIO, error);
}
// Close inactive connections.
posix_time::ptime linger_deadline = posix_time::microsec_clock::local_time()
- posix_time::seconds(max_con_linger_);
connection_map::iterator iter2 = connections_.begin();
while (iter2 != connections_.end()) {
ClientConnection* con = iter2->second;
assert(con != NULL);
if (con->last_used() < linger_deadline) {
string error = "Connection was inactive for more than "
+ boost::lexical_cast<string>(max_con_linger_)
+ " seconds.";
if (Logging::log->loggingActive(LEVEL_INFO)) {
Logging::log->getLog(LEVEL_INFO) << "Closing connection to '"
<< iter2->first << "' since it " << error.substr(11) << endl;
}
con->Close(error);
delete con;
connections_.erase(iter2++);
} else {
++iter2;
}
}
} catch (std::exception &e) {
Logging::log->getLog(LEVEL_ERROR) << "An exception occurred while checking"
" for timed out requests and connections: " << e.what() << endl;
}
rq_timeout_timer_.expires_from_now(posix_time::seconds(rq_timeout_s_));
rq_timeout_timer_.async_wait(boost::bind(&Client::handleTimeout,
this,
asio::placeholders::error));
}
void Client::AbortClientRequest(ClientRequest* request,
const std::string& error) {
// Error response for canceled requests.
POSIXErrno posix_errno = POSIX_ERROR_EIO;
RPCHeader::ErrorResponse err;
err.set_error_type(IO_ERROR);
err.set_posix_errno(posix_errno);
err.set_error_message(error);
request->set_error(new RPCHeader::ErrorResponse(err));
request->ExecuteCallback();
Logging::log->getLog(LEVEL_ERROR)
<< "operation failed: errno=" << posix_errno
<< " message=" << error << endl;
}
void Client::run() {
rq_timeout_timer_.expires_from_now(posix_time::seconds(rq_timeout_s_));
rq_timeout_timer_.async_wait(boost::bind(&Client::handleTimeout,
this,
asio::placeholders::error));
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "Starting RPC client." << endl;
#ifndef HAS_OPENSSL
Logging::log->getLog(LEVEL_DEBUG) << "Running in plain TCP mode."<< endl;
#else
if (ssl_context_ != NULL) {
if (use_gridssl_) {
Logging::log->getLog(LEVEL_DEBUG) << "Running in GRID SSL mode." << endl;
} else {
Logging::log->getLog(LEVEL_DEBUG) << "Running in SSL mode." << endl;
}
} else {
Logging::log->getLog(LEVEL_DEBUG) << "Running in plain TCP mode."<< endl;
}
#endif // !HAS_OPENSSL
}
// Does not return as long as there are running timers (e.g.,
// rq_timeout_timer_) or pending boost::asio callbacks.
service_.run();
// Delete the ClientConnection object of all open connections.
for (connection_map::iterator iter = connections_.begin();
iter != connections_.end();
++iter) {
delete iter->second;
}
connections_.clear();
// A request may not have made it from requests_ to request_table_. Cancel
// those, too.
{
boost::mutex::scoped_lock lock(requests_mutex_);
while (requests_.size()) {
ClientRequest* request = requests_.front();
requests_.pop();
AbortClientRequest(request,
"Request aborted since RPC client was stopped.");
}
}
// Delete requests which were successfully sent, but not response was received
// for them.
for (request_map::iterator iter = request_table_.begin();
iter != request_table_.end();
++iter) {
AbortClientRequest(iter->second,
"Request aborted since RPC client was stopped.");
}
request_table_.clear();
#ifdef HAS_OPENSSL
// Cleanup thread-local OpenSSL state.
ERR_remove_state(0);
#endif // HAS_OPENSSL
}
void Client::shutdown() {
bool already_stopped = false;
{
boost::mutex::scoped_lock lock(requests_mutex_);
already_stopped = stopped_;
stopped_ = true;
}
if (!already_stopped) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "RPC client stopped." << endl;
}
service_.post(boost::bind(&Client::ShutdownHandler, this));
} else {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN)
<< "Tried to stop the RPC client although it was already stopped."
<< endl;
}
}
}
void Client::ShutdownHandler() {
stopped_ioservice_only_ = true;
rq_timeout_timer_.cancel();
for (connection_map::iterator iter = connections_.begin();
iter != connections_.end();
++iter) {
ClientConnection *con = iter->second;
assert(con != NULL);
con->Close("RPC client was stopped.");
}
}
FILE* Client::create_and_open_temporary_ssl_file(std::string *filename_template,
const char* mode) {
if (filename_template == NULL || mode == NULL) {
return NULL;
}
#ifdef WIN32
// Gets the temp path env string (no guarantee it's a valid path).
char temp_path[MAX_PATH];
char filename_temp[MAX_PATH];
DWORD dwRetVal = 0;
dwRetVal = GetTempPath(MAX_PATH, // length of the buffer
LPTSTR(temp_path)); // buffer for path
if (dwRetVal > MAX_PATH || (dwRetVal == 0)) {
strncpy(temp_path, ".", 1);
}
// Generates a temporary file name.
if (!GetTempFileName(LPTSTR(temp_path), // directory for tmp files
TEXT("xfs"), // temp file name prefix, max 3 char
0, // create unique name
LPWSTR(filename_temp))) { // buffer for name
std::cerr << "Couldn't create temp file name.\n";
return NULL;
}
*filename_template = std::string(filename_temp);
return _tfopen(LPTSTR(filename_temp), LPTSTR(mode));
#else
// Place file in TMPDIR or /tmp if not specified as absolute path.
std::string filename = *filename_template;
if (!boost::algorithm::starts_with<std::string, std::string>(filename, "/")) {
char *tmpdir = getenv("TMPDIR");
if (tmpdir != NULL) {
std::string tmp(tmpdir);
if (!boost::algorithm::ends_with(tmp, "/")) {
tmp += "/";
}
filename = tmp + filename;
} else {
filename = "/tmp/" + filename;
}
}
char *temporary_filename = strdup(filename.c_str());
int tmp = mkstemp(temporary_filename);
if (tmp == -1) {
std::cerr << "Couldn't create temp file name.\n";
free(temporary_filename);
return NULL;
}
*filename_template = std::string(temporary_filename);
free(temporary_filename);
return fdopen(tmp, mode);
#endif // WIN32
}
#ifdef HAS_OPENSSL
boost::asio::ssl::context_base::method Client::string_to_ssl_method(
std::string method_string,
boost::asio::ssl::context_base::method default_method) {
if (method_string == "sslv3") {
return boost::asio::ssl::context_base::sslv3_client;
} else if (method_string == "ssltls") {
return boost::asio::ssl::context_base::sslv23_client;
} else if (method_string == "tlsv1") {
return boost::asio::ssl::context_base::tlsv1_client;
}
#if (BOOST_VERSION > 105300)
else if (method_string == "tlsv11") {
return boost::asio::ssl::context_base::tlsv11_client;
} else if (method_string == "tlsv12") {
return boost::asio::ssl::context_base::tlsv12_client;
}
#endif // BOOST_VERSION > 105300
else {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN) << "Unknown SSL method: '"
<< method_string << "', using default." << endl;
}
return default_method;
}
}
#endif // HAS_OPENSSL
Client::~Client() {
#ifdef HAS_OPENSSL
// remove temporary cert and pem files
if (pemFileName != NULL) {
unlink(pemFileName);
}
if (certFileName != NULL) {
unlink(certFileName);
}
if (trustedCAsFileName != NULL) {
unlink(trustedCAsFileName);
}
// strdup initialized.
free(pemFileName);
free(certFileName);
free(trustedCAsFileName);
if (ssl_options) {
ERR_remove_state(0);
ERR_free_strings();
}
delete ssl_options;
delete ssl_context_;
#endif // HAS_OPENSSL
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif // _MSC_VER
} // namespace rpc
} // namespace xtreemfs

View File

@@ -0,0 +1,608 @@
/*
* Copyright (c) 2009-2010 by Bjoern Kolbeck, Zuse Institute Berlin
* 2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "rpc/client_connection.h"
#include <errno.h>
#include <boost/bind.hpp>
#include <iostream>
#include <string>
#include <vector>
#ifdef HAS_VALGRIND
#include <valgrind/memcheck.h>
#include <valgrind/valgrind.h>
#endif // HAS_VALGRIND
#include "rpc/grid_ssl_socket_channel.h"
#include "rpc/ssl_socket_channel.h"
#include "rpc/tcp_socket_channel.h"
#include "util/logging.h"
namespace xtreemfs {
namespace rpc {
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
using namespace std;
using namespace boost;
using namespace google::protobuf;
using namespace boost::asio::ip;
ClientConnection::ClientConnection(
const string& server_name,
const string& port,
asio::io_service& service,
request_map *request_table,
int32_t connect_timeout_s,
int32_t max_reconnect_interval_s
#ifdef HAS_OPENSSL
,bool use_gridssl,
boost::asio::ssl::context* ssl_context
#endif // HAS_OPENSSL
)
: receive_marker_(NULL),
receive_hdr_(NULL),
receive_msg_(NULL),
receive_data_(NULL),
connection_state_(IDLE),
requests_(),
current_request_(NULL),
server_name_(server_name),
server_port_(port),
service_(service),
resolver_(service),
socket_(NULL),
endpoint_(NULL),
request_table_(request_table),
timer_(service),
connect_timeout_s_(connect_timeout_s),
max_reconnect_interval_s_(max_reconnect_interval_s),
next_reconnect_at_(boost::posix_time::not_a_date_time),
last_connect_was_at_(boost::posix_time::not_a_date_time),
reconnect_interval_s_(1)
#ifdef HAS_OPENSSL
,use_gridssl_(use_gridssl),
ssl_context_(ssl_context)
#endif // HAS_OPENSSL
{
receive_marker_buffer_ = new char[RecordMarker::get_size()];
CreateChannel();
}
void ClientConnection::AddRequest(ClientRequest* request) {
request->set_client_connection(this);
requests_.push(PendingRequest(request->call_id(), request));
(*request_table_)[request->call_id()] = request;
}
void ClientConnection::SendError(POSIXErrno posix_errno,
const string &error_message) {
if (!requests_.empty()) {
RPCHeader::ErrorResponse err;
err.set_error_type(IO_ERROR);
err.set_posix_errno(posix_errno);
err.set_error_message(error_message);
while (!requests_.empty()) {
uint32_t call_id = requests_.front().call_id;
request_map::iterator iter = request_table_->find(call_id);
if (iter != request_table_->end()) {
// ClientRequest still exists in request_table_, it's safe to access it.
ClientRequest *request = requests_.front().rq;
request->set_error(new RPCHeader::ErrorResponse(err));
request->ExecuteCallback();
request_table_->erase(call_id);
Logging::log->getLog(LEVEL_ERROR)
<< "operation failed: call_id=" << call_id
<< " errno=" << posix_errno
<< " message=" << error_message << endl;
}
requests_.pop();
}
}
}
void ClientConnection::DoProcess() {
last_used_ = posix_time::second_clock::local_time();
if (connection_state_ == IDLE) {
if (endpoint_ == NULL) {
Connect();
} else {
// Do write.
SendRequest();
}
} else if (connection_state_ == WAIT_FOR_RECONNECT) {
posix_time::ptime now = posix_time::second_clock::local_time();
if (next_reconnect_at_ <= now) {
next_reconnect_at_ = posix_time::not_a_date_time;
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "trying reconnect..." << endl;
}
Connect();
} else {
SendError(POSIX_ERROR_EIO,
"cannot connect to server '" + server_name_ + ":" + server_port_
+ "', reconnect blocked locally"
" to avoid flooding the server");
}
}
}
void ClientConnection::DelayedSocketDeletionHandler(
AbstractSocketChannel* socket) {
delete socket;
}
void ClientConnection::CreateChannel() {
if (socket_ != NULL) {
socket_->close();
// In case of SSL connections, boost::asio tries to write to the socket
// after the SSL stream and the socket was shutdown. Therefore, we delay
// the deletion and hope that no segmentation fault is triggered. The
// correct way would have been to use a shared_ptr for the socket.
service_.post(boost::bind(&ClientConnection::DelayedSocketDeletionHandler,
socket_));
socket_ = NULL;
}
#ifndef HAS_OPENSSL
socket_ = new TCPSocketChannel(service_);
#else
if (ssl_context_ == NULL) {
socket_ = new TCPSocketChannel(service_);
} else if (use_gridssl_) {
socket_ = new GridSSLSocketChannel(service_, *ssl_context_);
} else {
socket_ = new SSLSocketChannel(service_, *ssl_context_);
}
#endif // !HAS_OPENSSL
}
void ClientConnection::Connect() {
connection_state_ = CONNECTING;
last_connect_was_at_ = posix_time::second_clock::local_time();
#if (BOOST_VERSION > 104200)
asio::ip::tcp::resolver::query query(
server_name_,
server_port_,
static_cast<asio::ip::resolver_query_base::flags>(0) /* no flags */);
#else
asio::ip::tcp::resolver::query query(server_name_, server_port_);
#endif
resolver_.async_resolve(query,
boost::bind(&ClientConnection::PostResolve,
this,
asio::placeholders::error,
asio::placeholders::iterator));
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "connect timeout is "
<< connect_timeout_s_ << " seconds\n";
}
}
void ClientConnection::OnConnectTimeout(const boost::system::error_code& err) {
if (err == asio::error::operation_aborted || err == asio::error::eof
|| connection_state_ == CLOSED) {
return;
}
Reset();
SendError(POSIX_ERROR_EIO,
"connection to '" + server_name_ + ":" + server_port_
+ "' timed out");
}
void ClientConnection::PostResolve(const boost::system::error_code& err,
tcp::resolver::iterator endpoint_iterator) {
if (err == asio::error::operation_aborted || err == asio::error::eof
|| connection_state_ == CLOSED) {
return;
}
if (err) {
Reset();
SendError(POSIX_ERROR_EIO,
"could not connect to '" + server_name_ + ":" + server_port_
+ "': " + err.message());
}
if (endpoint_iterator != tcp::resolver::iterator()) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "resolved: "
<< (*endpoint_iterator).host_name() << endl;
}
if (endpoint_ != NULL) {
delete endpoint_;
}
endpoint_ = new tcp::endpoint(*endpoint_iterator);
timer_.expires_from_now(posix_time::seconds(connect_timeout_s_));
timer_.async_wait(boost::bind(&ClientConnection::OnConnectTimeout,
this,
asio::placeholders::error));
socket_->async_connect(*endpoint_,
boost::bind(&ClientConnection::PostConnect,
this,
asio::placeholders::error,
endpoint_iterator));
} else {
SendError(POSIX_ERROR_EINVAL, string("cannot resolve hostname: '")
+ this->server_name_ + ":" + server_port_ + string("'"));
}
}
void ClientConnection::PostConnect(const boost::system::error_code& err,
tcp::resolver::iterator endpoint_iterator) {
if (err == asio::error::operation_aborted || err == asio::error::eof
|| connection_state_ == CLOSED) {
return;
}
timer_.cancel();
if (err) {
delete endpoint_;
endpoint_ = NULL;
if (++endpoint_iterator != tcp::resolver::iterator()) {
// Try next endpoint.
CreateChannel();
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "failed: next endpoint"
<< err.message() << "\n";
}
PostResolve(err, endpoint_iterator);
} else {
Reset();
string ssl_error_info;
#ifdef HAS_OPENSSL
if (err.category() == asio::error::ssl_category) {
ssl_error_info = ERR_error_string(ERR_get_error(), NULL);
}
#endif // HAS_OPENSSL
SendError(POSIX_ERROR_EIO,
"could not connect to host '" + server_name_ + ":"
+ server_port_ + "': " + err.message()+" "+ssl_error_info);
}
} else {
// Do something useful.
reconnect_interval_s_ = 1;
next_reconnect_at_ = posix_time::not_a_date_time;
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "connected to "
<< (*endpoint_iterator).host_name() << ":"
<< (*endpoint_iterator).service_name() << endl;
#ifdef HAS_OPENSSL
if (ssl_context_ != NULL) {
Logging::log->getLog(LEVEL_DEBUG) << "Using SSL/TLS version '"
<< ((SSLSocketChannel*) socket_)->ssl_tls_version() << "'." << endl;
}
#endif // HAS_OPENSSL
}
connection_state_ = IDLE;
if (!requests_.empty()) {
SendRequest();
ReceiveRequest();
}
}
}
void ClientConnection::SendRequest() {
if (!requests_.empty()) {
connection_state_ = ACTIVE;
uint32_t call_id = requests_.front().call_id;
ClientRequest* rq = requests_.front().rq;
assert(rq != NULL);
// If the request is no longer present in request_table_, it was already
// deleted meanwhile (e.g. by Client::handleTimeout()).
// Get request from table.
request_map::iterator iter = request_table_->find(call_id);
if (iter == request_table_->end()) {
// ClientRequest was already deleted, stop here.
requests_.pop();
SendRequest();
} else {
// Process ClientRequest.
const RecordMarker* rrm = rq->request_marker();
vector<boost::asio::const_buffer> bufs;
bufs.push_back(boost::asio::buffer(
reinterpret_cast<const void*>(rq->rq_hdr_msg()),
RecordMarker::get_size() + rrm->header_len() + rrm->message_len()));
if (rrm->data_len() > 0) {
bufs.push_back(boost::asio::buffer(
reinterpret_cast<const void*>(rq->rq_data()), rrm->data_len()));
}
socket_->async_write(bufs, boost::bind(
&ClientConnection::PostWrite,
this,
asio::placeholders::error,
asio::placeholders::bytes_transferred));
}
} else {
connection_state_ = IDLE;
}
}
void ClientConnection::ReceiveRequest() {
if (endpoint_) {
socket_->async_read(asio::buffer(receive_marker_buffer_,
RecordMarker::get_size()),
boost::bind(&ClientConnection::PostReadRecordMarker,
this,
asio::placeholders::error));
}
}
void ClientConnection::Reset() {
CreateChannel();
delete endpoint_;
endpoint_ = NULL;
connection_state_ = WAIT_FOR_RECONNECT;
posix_time::ptime now = posix_time::second_clock::local_time();
posix_time::seconds reconnect_interval(reconnect_interval_s_);
if (last_connect_was_at_ != boost::posix_time::not_a_date_time) {
posix_time::time_duration elapsed_time_since_last_connect =
now - last_connect_was_at_;
if (elapsed_time_since_last_connect.is_negative()) {
next_reconnect_at_ = now;
} else if (elapsed_time_since_last_connect <= reconnect_interval) {
next_reconnect_at_
= now + reconnect_interval - elapsed_time_since_last_connect;
} else {
next_reconnect_at_ = now;
}
} else {
next_reconnect_at_ = now + reconnect_interval;
}
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG)
<< "Connection reset, next reconnect in "
<< (next_reconnect_at_ - now).seconds() << " seconds." << endl;
}
reconnect_interval_s_ *= 2;
if (reconnect_interval_s_ > max_reconnect_interval_s_) {
reconnect_interval_s_ = max_reconnect_interval_s_;
}
}
void ClientConnection::Close(const std::string& error) {
resolver_.cancel();
timer_.cancel();
if (socket_) {
socket_->close();
// In case of SSL connections, boost::asio tries to write to the socket
// after the SSL stream and the socket was shutdown. Therefore, we delay
// the deletion and hope that no segmentation fault is triggered. The
// correct way would have been to use a shared_ptr for the socket.
service_.post(boost::bind(&ClientConnection::DelayedSocketDeletionHandler,
socket_));
socket_ = NULL;
}
connection_state_ = CLOSED;
SendError(POSIX_ERROR_EIO,
"Connection to '" + server_name_ + ":" + server_port_ + "' closed"
" locally due to: " + error);
}
void ClientConnection::PostWrite(const boost::system::error_code& err,
size_t bytes_written) {
if (err == asio::error::operation_aborted || err == asio::error::eof
|| connection_state_ == CLOSED) {
return;
}
if (err) {
Reset();
SendError(POSIX_ERROR_EIO,
"Could not send request to '" + server_name_ + ":" +server_port_
+ "': " + err.message());
} else {
// Pop sent request.
if (!requests_.empty()) {
requests_.pop();
connection_state_ = IDLE;
if (!requests_.empty()) {
SendRequest();
}
}
}
}
void ClientConnection::PostReadRecordMarker(
const boost::system::error_code& err) {
if (err == asio::error::operation_aborted || err == asio::error::eof
|| connection_state_ == CLOSED) {
return;
}
if (err) {
Reset();
SendError(POSIX_ERROR_EIO,
"could not read record marker in response from '" + server_name_
+ ":" + server_port_ + "': " + err.message());
} else {
#ifdef HAS_VALGRIND
// On some OpenSSL versions with SSLv3 connections, Valgrind reports the
// marker buffer as not initialized.
if (RUNNING_ON_VALGRIND > 0) {
VALGRIND_MAKE_MEM_DEFINED(receive_marker_buffer_,
RecordMarker::get_size());
}
#endif // HAS_VALGRIND
// Do read.
receive_marker_ = new RecordMarker(receive_marker_buffer_);
vector<boost::asio::mutable_buffer> bufs;
receive_hdr_ = new char[receive_marker_->header_len()];
bufs.push_back(asio::buffer(reinterpret_cast<void*> (receive_hdr_),
receive_marker_->header_len()));
if (receive_marker_->message_len() > 0) {
receive_msg_ = new char[receive_marker_->message_len()];
bufs.push_back(asio::buffer(reinterpret_cast<void*> (receive_msg_),
receive_marker_->message_len()));
} else {
receive_msg_ = NULL;
}
if (receive_marker_->data_len() > 0) {
receive_data_ = new char[receive_marker_->data_len()];
bufs.push_back(asio::buffer(reinterpret_cast<void*> (receive_data_),
receive_marker_->data_len()));
} else {
receive_data_ = NULL;
}
socket_->async_read(bufs,
boost::bind(&ClientConnection::PostReadMessage,
this,
asio::placeholders::error));
}
}
void ClientConnection::PostReadMessage(const boost::system::error_code& err) {
if (err == asio::error::operation_aborted || err == asio::error::eof
|| connection_state_ == CLOSED) {
return;
}
if (err) {
DeleteInternalBuffers();
Reset();
SendError(POSIX_ERROR_EIO,
"could not read response from '" + server_name_ + ":"
+ server_port_ + "': " + err.message());
} else {
#ifdef HAS_VALGRIND
// On some OpenSSL versions with SSLv3 connections, Valgrind reports the
// header buffer as not initialized.
if (RUNNING_ON_VALGRIND > 0) {
VALGRIND_MAKE_MEM_DEFINED(receive_hdr_, receive_marker_->header_len());
}
#endif // HAS_VALGRIND
// Parse header.
RPCHeader *respHdr = new RPCHeader();
if (respHdr->ParseFromArray(receive_hdr_, receive_marker_->header_len())) {
delete[] receive_hdr_;
receive_hdr_ = NULL;
} else {
// Error parsing the header.
DeleteInternalBuffers();
delete respHdr;
Reset();
SendError(POSIX_ERROR_EINVAL,
"received garbage header from '" + server_name_ + ":"
+ server_port_ + "', closing connection");
return;
}
// Get request from table.
request_map::iterator iter = request_table_->find(respHdr->call_id());
ClientRequest *rq;
if (iter != request_table_->end()) {
rq = iter->second;
} else {
if (Logging::log->loggingActive(LEVEL_WARN)) {
Logging::log->getLog(LEVEL_WARN)
<< "Received response for unknown request from "
"'" << server_name_ << ":" << server_port_ << "'"
" (call id = " << respHdr->call_id() << ")." << endl;
}
DeleteInternalBuffers();
delete respHdr;
// Receive next request.
ReceiveRequest();
return;
}
uint32 call_id = respHdr->call_id();
if (respHdr->has_error_response()) {
// Error response.
rq->set_error(new RPCHeader::ErrorResponse(respHdr->error_response()));
// Manually cleanup response header.
delete respHdr;
} else {
// Parse message, if exists.
if (receive_marker_->message_len() > 0) {
if (!rq->resp_message()) {
// Not prepared to receive a message.
// Print error and discard data.
Logging::log->getLog(LEVEL_WARN)
<< "Received an unexpected response message (expected size 0, got "
<< receive_marker_->message_len() << " bytes) from "
<< server_name_ << std::endl;
} else {
assert(receive_msg_ != NULL);
if (!rq->resp_message()->ParseFromArray(
receive_msg_,
receive_marker_->message_len())) {
// Parsing message failed. Generate error.
RPCHeader::ErrorResponse *err = new RPCHeader::ErrorResponse();
err->set_error_type(GARBAGE_ARGS);
err->set_posix_errno(POSIX_ERROR_NONE);
err->set_error_message(string("cannot parse message data: ")
+ rq->resp_message()->InitializationErrorString());
rq->set_error(err);
// manually cleanup response header
delete respHdr;
} else {
// Message successfully parsed, set data.
// Hand over responsibility for receive_data_ to request object.
rq->set_resp_data(receive_data_);
rq->set_resp_data_len(receive_marker_->data_len());
receive_data_ = NULL;
}
}
}
// Always set response header.
rq->set_resp_header(respHdr);
}
// Remove from table and clean up buffers.
request_table_->erase(call_id);
DeleteInternalBuffers();
rq->ExecuteCallback();
// Receive next request.
ReceiveRequest();
}
}
void ClientConnection::DeleteInternalBuffers() {
delete[] receive_hdr_;
receive_hdr_ = NULL;
delete[] receive_msg_;
receive_msg_ = NULL;
delete[] receive_data_;
receive_data_ = NULL;
delete receive_marker_;
receive_marker_ = NULL;
}
ClientConnection::~ClientConnection() {
delete endpoint_;
delete[] receive_marker_buffer_;
DeleteInternalBuffers();
}
} // namespace rpc
} // namespace xtreemfs

View File

@@ -0,0 +1,113 @@
/*
* Copyright (c) 2009-2010 by Bjoern Kolbeck, Zuse Institute Berlin
* 2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "rpc/client_request.h"
#include <google/protobuf/message.h>
#include <string>
#include "pbrpc/RPC.pb.h"
#include "rpc/client_request_callback_interface.h"
#include "rpc/record_marker.h"
#include "util/logging.h"
namespace xtreemfs {
namespace rpc {
using namespace xtreemfs::pbrpc;
using namespace xtreemfs::util;
using namespace std;
using namespace boost;
using namespace google::protobuf;
ClientRequest::ClientRequest(const string& address,
const uint32_t call_id,
const uint32_t interface_id,
const uint32_t proc_id,
const UserCredentials& userCreds,
const Auth& auth,
const Message* request_message,
const char* request_data,
const int data_length,
Message* response_message,
void *context,
ClientRequestCallbackInterface *callback)
: client_connection_(NULL),
call_id_(call_id),
interface_id_(interface_id),
proc_id_(proc_id),
context_(context),
callback_(callback),
address_(address),
callback_executed_(false),
error_(NULL),
resp_header_(NULL),
resp_message_(response_message),
resp_data_(NULL),
resp_data_len_(0) {
RPCHeader header = RPCHeader();
header.set_message_type(xtreemfs::pbrpc::RPC_REQUEST);
header.set_call_id(call_id);
header.mutable_request_header()->set_interface_id(interface_id);
header.mutable_request_header()->set_proc_id(proc_id);
header.mutable_request_header()->mutable_user_creds()->
MergeFrom(userCreds);
header.mutable_request_header()->mutable_auth_data()->MergeFrom(auth);
assert(callback_ != NULL);
uint32_t msg_len =
(request_message == NULL) ? 0 : request_message->ByteSize();
this->request_marker_ = new RecordMarker(header.ByteSize(),
msg_len, data_length);
this->rq_hdr_msg_ = new char[RecordMarker::get_size()
+ this->request_marker_->header_len()
+ request_marker_->message_len()];
char *hdrPtr = this->rq_hdr_msg_ + RecordMarker::get_size();
char *msgPtr = hdrPtr + request_marker_->header_len();
request_marker_->serialize(rq_hdr_msg_);
header.SerializeToArray(hdrPtr, request_marker_->header_len());
if (msg_len > 0) {
request_message->SerializeToArray(msgPtr, request_marker_->message_len());
if (!request_message->IsInitialized()) {
string errmsg = string("message is not valid. Not all required "
"fields have been initialized: ") +
request_message->InitializationErrorString();
Logging::log->getLog(xtreemfs::util::LEVEL_ERROR) << errmsg << endl;
throw std::runtime_error(errmsg);
}
}
this->rq_data_ = request_data;
}
void ClientRequest::deleteInternalBuffers() {
if (request_marker_)
delete request_marker_;
if (rq_hdr_msg_)
delete[] rq_hdr_msg_;
if (resp_header_)
delete resp_header_;
}
ClientRequest::~ClientRequest() {
deleteInternalBuffers();
}
void ClientRequest::ExecuteCallback() {
if (!callback_executed_) {
callback_executed_ = true;
callback_->RequestCompleted(this);
}
}
void ClientRequest::RequestSent() {
time_sent_ = posix_time::microsec_clock::local_time();
}
} // namespace rpc
} // namespace xtreemfs

View File

@@ -0,0 +1,60 @@
/*
* Copyright (c) 2009-2010 by Bjoern Kolbeck, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "rpc/record_marker.h"
#ifdef _WIN32
#include <Winsock2.h>
#else
// Linux & co.
#include <arpa/inet.h>
#endif
#include <iostream>
#include "util/logging.h"
namespace xtreemfs {
namespace rpc {
using namespace xtreemfs::util;
RecordMarker::RecordMarker(uint32_t header_len,
uint32_t message_len,
uint32_t data_len)
: header_len_(header_len),
message_len_(message_len),
data_len_(data_len) {
}
RecordMarker::RecordMarker(const char* buffer) {
const uint32_t* tmp = reinterpret_cast<const uint32_t*> (buffer);
this->header_len_ = ntohl(tmp[0]);
this->message_len_ = ntohl(tmp[1]);
this->data_len_ = ntohl(tmp[2]);
}
void RecordMarker::serialize(char* buffer) const {
uint32_t* tmp = reinterpret_cast<uint32_t*> (buffer);
tmp[0] = htonl(this->header_len_);
tmp[1] = htonl(this->message_len_);
tmp[2] = htonl(this->data_len_);
}
uint32_t RecordMarker::data_len() const {
return data_len_;
}
uint32_t RecordMarker::message_len() const {
return message_len_;
}
uint32_t RecordMarker::header_len() const {
return header_len_;
}
} // namespace rpc
} // namespace xtreemfs

View File

@@ -0,0 +1,77 @@
/*
* Copyright (c) 2009-2010 by Bjoern Kolbeck, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "rpc/sync_callback.h"
#include "rpc/client_request.h"
namespace xtreemfs {
namespace rpc {
SyncCallbackBase::SyncCallbackBase() : request_(NULL) {
}
SyncCallbackBase::~SyncCallbackBase() {
// TODO(mberlin): Is a lock here really needed?!
boost::lock_guard<boost::mutex> lock(cond_lock_);
delete request_;
}
void SyncCallbackBase::RequestCompleted(ClientRequest* rq) {
boost::lock_guard<boost::mutex> lock(cond_lock_);
request_ = rq;
response_avail_.notify_all();
}
void SyncCallbackBase::WaitForResponse() {
boost::unique_lock<boost::mutex> lock(cond_lock_);
while (!request_) {
response_avail_.wait(lock);
}
}
bool SyncCallbackBase::HasFinished() {
boost::unique_lock<boost::mutex> lock(cond_lock_);
return (request_ != NULL);
}
bool SyncCallbackBase::HasFailed() {
WaitForResponse();
return (request_->error() != NULL);
}
uint32_t SyncCallbackBase::data_length() {
WaitForResponse();
return (request_->resp_data_len());
}
char* SyncCallbackBase::data() {
WaitForResponse();
return (request_->resp_data());
}
xtreemfs::pbrpc::RPCHeader::ErrorResponse* SyncCallbackBase::error() {
WaitForResponse();
return request_->error();
}
::google::protobuf::Message* SyncCallbackBase::response() {
WaitForResponse();
return request_->resp_message();
}
void SyncCallbackBase::DeleteBuffers() {
if (request_) {
request_->clear_error();
request_->clear_resp_message();
request_->clear_resp_data();
}
}
} // namespace rpc
} // namespace xtreemfs

View File

@@ -0,0 +1,31 @@
/*
* Copyright (c) 2009-2010 by Bjoern Kolbeck, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "util/logging.h"
#include <ostream>
#include <boost/thread.hpp>
#include <string>
#include "util/error_log.h"
namespace xtreemfs {
namespace util {
void initialize_error_log(int max_entries) {
ErrorLog::error_log = new ErrorLog(max_entries);
}
void shutdown_error_log() {
delete ErrorLog::error_log;
}
ErrorLog* ErrorLog::error_log = NULL;
} // namespace util
} // namespace xtreemfs

175
cpp/src/util/logging.cpp Normal file
View File

@@ -0,0 +1,175 @@
/*
* Copyright (c) 2009-2010 by Bjoern Kolbeck, Zuse Institute Berlin
* 2011-2012 by Michael Berlin, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "util/logging.h"
#ifdef WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include <ctime>
#endif
#include <boost/thread.hpp>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <string>
using namespace std;
namespace xtreemfs {
namespace util {
Logging::Logging(LogLevel level, std::ostream* stream)
: log_stream_(*stream), log_file_stream_(stream), level_(level) {
}
Logging::Logging(LogLevel level)
: log_stream_(std::cout), log_file_stream_(NULL), level_(level) {
}
Logging::~Logging() {
if (log_file_stream_) {
delete log_file_stream_;
}
}
std::ostream& Logging::getLog(LogLevel level, const char* file, int line) {
#ifdef WIN32
SYSTEMTIME st, lt;
GetSystemTime(&st);
GetLocalTime(&lt);
#else
timeval current_time;
gettimeofday(&current_time, 0);
struct tm* tm = localtime(&current_time.tv_sec);
#endif
log_stream_
<< "[ " << levelToChar(level) << " | "
// NOTE(mberlin): Disabled output of __FILE__ and __LINE__ since they are
// not used in the current (3/2012) code base.
// << file << ":" << line << " | "
<< setiosflags(ios::dec)
#ifdef WIN32
<< setw(2) << lt.wMonth << "/" << setw(2) << lt.wDay << " "
<< setfill('0') << setw(2) << lt.wHour << ":"
<< setfill('0') << setw(2) << lt.wMinute << ":"
<< setfill('0') << setw(2) << lt.wSecond << "."
<< setfill('0') << setw(3) << lt.wMilliseconds << " | "
#else
<< setw(2) << (tm->tm_mon + 1) << "/" << setw(2) << tm->tm_mday << " "
<< setfill('0') << setw(2) << tm->tm_hour << ":"
<< setfill('0') << setw(2) << tm->tm_min << ":"
<< setfill('0') << setw(2) << tm->tm_sec << "."
<< setfill('0') << setw(3) << (current_time.tv_usec / 1000) << " | "
#endif
<< left << setfill(' ') << setw(14)
<< boost::this_thread::get_id() << " ] "
// Reset modifiers.
<< setfill(' ') << resetiosflags(ios::hex | ios::left);
return log_stream_;
}
char Logging::levelToChar(LogLevel level) {
switch (level) {
case LEVEL_EMERG: return 'e';
case LEVEL_ALERT: return 'A';
case LEVEL_CRIT: return 'C';
case LEVEL_ERROR: return 'E';
case LEVEL_WARN: return 'W';
case LEVEL_NOTICE: return 'N';
case LEVEL_INFO: return 'I';
case LEVEL_DEBUG: return 'D';
}
std::cerr << "Could not determine log level." << std::endl;
return 'U'; // unkown
}
bool Logging::loggingActive(LogLevel level) {
return (level <= level_);
}
LogLevel stringToLevel(std::string stringLevel, LogLevel defaultLevel) {
if (stringLevel == "EMERG") {
return LEVEL_EMERG;
} else if (stringLevel == "ALERT") {
return LEVEL_ALERT;
} else if (stringLevel == "CRIT") {
return LEVEL_CRIT;
} else if (stringLevel == "ERR") {
return LEVEL_ERROR;
} else if (stringLevel == "WARNING") {
return LEVEL_WARN;
} else if (stringLevel == "NOTICE") {
return LEVEL_NOTICE;
} else if (stringLevel == "INFO") {
return LEVEL_INFO;
} else if (stringLevel == "DEBUG") {
return LEVEL_DEBUG;
} else {
// Return the default.
return defaultLevel;
}
}
void initialize_logger(std::string stringLevel,
std::string logfilePath,
LogLevel defaultLevel) {
initialize_logger(stringToLevel(stringLevel, defaultLevel), logfilePath);
}
/**
* Log to a file given by logfilePath. If logfilePath is empty,
* stdout is used.
*/
void initialize_logger(LogLevel level, std::string logfilePath) {
// Do not initialize the logging multiple times.
if (Logging::log) {
return;
}
if (!logfilePath.empty()) {
ofstream* logfile = new std::ofstream(logfilePath.c_str(),
std::ios_base::out |
std::ios_base::app);
if (logfile != NULL && logfile->is_open()) {
cerr << "Logging to file " << logfilePath.c_str() << "." << endl;
Logging::log = new Logging(level, logfile);
return;
}
cerr << "Could not log to file " << logfilePath.c_str()
<< ". Fallback to stdout." << endl;
}
// in case of an error, log to stdout
Logging::log = new Logging(level);
}
/**
* Log to stdout
*/
void initialize_logger(LogLevel level) {
// Do not initialize the logging multiple times.
if (Logging::log) {
return;
}
Logging::log = new Logging(level);
}
void shutdown_logger() {
delete Logging::log;
Logging::log = NULL;
}
Logging* Logging::log = NULL;
} // namespace util
} // namespace xtreemfs

View File

@@ -0,0 +1,76 @@
/*
* Copyright (c) 2009 Juan Gonzalez de Benito.
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include <cmath>
#include <cstdlib>
#include <ctime>
#include "util/zipf_generator.h"
namespace xtreemfs {
namespace util {
/**
* Creates a new ZipfGenerator with the given skewness
* skew: Desired skewness
*/
ZipfGenerator::ZipfGenerator(const double skew) : skew(skew) {
srand(static_cast<unsigned>(time(0)));
size = -1;
}
/**
* Returns a number from [0,this.size)
*/
int ZipfGenerator::next() {
int ret_val = -1;
// Size must be set with function set_size() before generating any rank
if (size > 0) {
int index = -1;
double frequency = 0.0f;
double dice = 0.0f;
while (dice >= frequency) {
index = rand() % size; // int in [0,size)
frequency = get_probability(index + 1); // (0 is not allowed for
// probability computation)
dice = static_cast<double>(rand())/RAND_MAX; // double in [0.0,1.0]
}
ret_val = index;
}
return ret_val;
}
/**
* Returns the probability (0.0,1.0) to choose a given index
*/
double ZipfGenerator::get_probability(const int index) {
if (index == 0) {
return -1.0f;
} else {
return (1.0f / pow(index, skew)) / bottom;
}
}
/**
* Modifies the rank of the generated indexes
*/
void ZipfGenerator::set_size(const int new_size) {
size = new_size;
// calculate the generalized harmonic number of order 'size' of 'skew'
// http://en.wikipedia.org/wiki/Harmonic_number
bottom = 0.0f;
for (int i = 1; i <= size; i++) {
bottom += (1.0f / pow(i, skew));
}
}
} // namespace util
} // namespace xtreemfs

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,805 @@
/*
* Copyright (c) 2011 by Bjoern Kolbeck, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
#include "xtfsutil/xtfsutil_server.h"
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/thread/mutex.hpp>
#include <cassert>
#include <errno.h>
#include <list>
#ifndef WIN32
#include <sys/fcntl.h>
#endif // !WIN32
#include "json/json.h"
#include "libxtreemfs/client.h"
#include "libxtreemfs/volume.h"
#include "libxtreemfs/xtreemfs_exception.h"
#include "libxtreemfs/helper.h"
#include "util/error_log.h"
#include "util/logging.h"
using namespace std;
using namespace xtreemfs::util;
namespace xtreemfs {
XtfsUtilServer::XtfsUtilServer(const string& prefix)
: prefix_(prefix),
volume_(NULL),
client_(NULL),
xtreemfs_policies_prefix_("xtreemfs.policies.") {
}
XtfsUtilServer::~XtfsUtilServer() {
for (map<std::string, XCtlFile*>::iterator iter = xctl_files_.begin();
iter != xctl_files_.end();
++iter) {
delete iter->second;
}
}
void XtfsUtilServer::set_volume(Volume* volume) {
volume_ = volume;
}
void XtfsUtilServer::set_client(Client* client) {
client_ = client;
}
void XtfsUtilServer::ParseAndExecute(const xtreemfs::pbrpc::UserCredentials& uc,
const std::string& input_str,
XCtlFile* file) {
if (Logging::log->loggingActive(LEVEL_DEBUG)) {
Logging::log->getLog(LEVEL_DEBUG) << "xctl op: " << input_str << endl;
}
// Parse json input and validate.
Json::Reader reader;
Json::Value input;
if (!reader.parse(input_str, input, false)) {
file->set_last_result("{ \"error\":\"Input is not valid JSON\" }");
return;
}
if (!input.isObject()
|| !input.isMember("operation")
|| !input["operation"].isString()) {
file->set_last_result("{ \"error\":\"Input is not valid JSON. "
"Expected object with operation field.\" }");
return;
}
string op_name = input["operation"].asString();
Json::Value result(Json::objectValue);
try {
if (op_name == "getErrors") {
OpGetErrors(uc, input, &result);
} else if (op_name == "getattr") {
OpStat(uc, input, &result);
} else if (op_name == "setDefaultSP") {
OpSetDefaultSP(uc, input, &result);
} else if (op_name == "setDefaultRP") {
OpSetDefaultRP(uc, input, &result);
} else if (op_name == "setOSP") {
OpSetOSP(uc, input, &result);
} else if (op_name == "setRSP") {
OpSetRSP(uc, input, &result);
} else if (op_name == "addReplica") {
OpAddReplica(uc, input, &result);
} else if (op_name == "removeReplica") {
OpRemoveReplica(uc, input, &result);
} else if (op_name == "getSuitableOSDs") {
OpGetSuitableOSDs(uc, input, &result);
} else if (op_name == "setPolicyAttr") {
OpSetPolicyAttr(uc, input, &result);
} else if (op_name == "listPolicyAttrs") {
OpListPolicyAttr(uc, input, &result);
} else if (op_name == "setReplicationPolicy") {
OpSetReplicationPolicy(uc, input, &result);
} else if (op_name == "enableDisableSnapshots") {
OpEnableDisableSnapshots(uc, input, &result);
} else if (op_name == "listSnapshots") {
OpListSnapshots(uc, input, &result);
} else if (op_name == "createDeleteSnapshot") {
OpCreateDeleteSnapshot(uc, input, &result);
} else if (op_name == "setRemoveACL") {
OpSetRemoveACL(uc, input, &result);
} else if (op_name == "setVolumeQuota") {
OpSetVolumeQuota(uc, input, &result);
} else {
file->set_last_result(
"{ \"error\":\"Unknown operation '" + op_name + "'.\" }\n");
return;
}
} catch (const XtreemFSException &e) {
result["error"] = Json::Value(e.what());
} catch (const exception &e) {
result["error"] = Json::Value(string("Unknown error: ") + e.what());
}
Json::FastWriter writer;
file->set_last_result(writer.write(result));
}
void XtfsUtilServer::OpGetErrors(const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
Json::Value result = Json::Value(Json::arrayValue);
list<string> errors = ErrorLog::error_log->error_messages();
for (list<string>::iterator iter = errors.begin();
iter != errors.end(); ++iter) {
result.append(*iter);
}
(*output)["result"] = result;
}
void XtfsUtilServer::OpStat(const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()) {
(*output)["error"] = Json::Value("'path' field is missing.");
return;
}
const string path = input["path"].asString();
boost::scoped_ptr<xtreemfs::pbrpc::listxattrResponse>
xattrs(volume_->ListXAttrs(uc, path, false));
map<string, string> xtfs_attrs;
for (int i = 0; i < xattrs->xattrs_size(); ++i) {
const xtreemfs::pbrpc::XAttr& xattr = xattrs->xattrs(i);
if (boost::starts_with(xattr.name(),"xtreemfs.")) {
xtfs_attrs[xattr.name()] = xattr.value();
}
}
Json::Reader reader;
Json::Value result(Json::objectValue);
result["fileId"] = Json::Value(xtfs_attrs["xtreemfs.file_id"]);
result["url"] = Json::Value(xtfs_attrs["xtreemfs.url"]);
result["object_type"] = Json::Value(xtfs_attrs["xtreemfs.object_type"]);
result["group"] = Json::Value(xtfs_attrs["xtreemfs.group"]);
result["owner"] = Json::Value(xtfs_attrs["xtreemfs.owner"]);
// Since 1.3.2 MRCs output the ACLs as JSON object.
Json::Value acl_json;
if (reader.parse(xtfs_attrs["xtreemfs.acl"], acl_json, false)) {
result["acl"] = acl_json;
} else {
result["acl"] = Json::Value(xtfs_attrs["xtreemfs.acl"]);
}
if (xtfs_attrs["xtreemfs.object_type"] == "1") {
// File.
Json::Reader reader;
Json::Value loc_json;
if (reader.parse(xtfs_attrs["xtreemfs.locations"], loc_json, false)) {
result["locations"] = loc_json;
}
} else if (xtfs_attrs["xtreemfs.object_type"] == "2") {
// Directory.
Json::Value sp_json;
if (reader.parse(xtfs_attrs["xtreemfs.default_sp"], sp_json, false)) {
result["default_sp"] = sp_json;
}
Json::Value rp_json;
if (reader.parse(xtfs_attrs["xtreemfs.default_rp"], rp_json, false)) {
result["default_rp"] = rp_json;
}
if (path == "/") {
// Get more volume details.
uint64_t quota = boost::lexical_cast<uint64_t>(xtfs_attrs["xtreemfs.quota"]);
int64_t quota_free_space = quota - boost::lexical_cast<uint64_t>(xtfs_attrs["xtreemfs.used_space"]);
// Use minimum of free space relative to the quota and free space on osds as free space.
if (quota != 0 && quota_free_space < boost::lexical_cast<uint64_t>(xtfs_attrs["xtreemfs.free_space"])) {
quota_free_space = quota_free_space < 0 ? 0 : quota_free_space;
result["free_space"] = Json::Value(boost::lexical_cast<std::string>(quota_free_space));
} else {
result["free_space"] = Json::Value(xtfs_attrs["xtreemfs.free_space"]);
}
result["used_space"] = Json::Value(xtfs_attrs["xtreemfs.used_space"]);
result["ac_policy_id"] =
Json::Value(xtfs_attrs["xtreemfs.ac_policy_id"]);
result["osel_policy"] = Json::Value(xtfs_attrs["xtreemfs.osel_policy"]);
result["rsel_policy"] = Json::Value(xtfs_attrs["xtreemfs.rsel_policy"]);
result["num_dirs"] = Json::Value(xtfs_attrs["xtreemfs.num_dirs"]);
result["num_files"] = Json::Value(xtfs_attrs["xtreemfs.num_files"]);
result["snapshots_enabled"] = Json::Value(xtfs_attrs["xtreemfs.snapshots_enabled"]);
Json::Value usable_osds_json;
if (reader.parse(xtfs_attrs["xtreemfs.usable_osds"],
usable_osds_json,
false)) {
result["usable_osds"] = usable_osds_json;
}
}
} else if (xtfs_attrs["xtreemfs.object_type"] == "3") {
// Softlink.
string link_target;
volume_->ReadLink(uc, path, &link_target);
result["link_target"] = Json::Value(link_target);
}
(*output)["result"] = result;
}
void XtfsUtilServer::OpSetDefaultSP(const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()
|| !input.isMember("pattern")
|| !input["pattern"].isString()
|| !input.isMember("width")
|| !input["width"].isInt()
|| !input.isMember("size")
|| !input["size"].isInt()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, pattern, "
"width, member");
return;
}
const string path = input["path"].asString();
Json::Value xattr_value(Json::objectValue);
xattr_value["pattern"] = input["pattern"];
xattr_value["size"] = input["size"];
xattr_value["width"] = input["width"];
Json::FastWriter writer;
volume_->SetXAttr(uc,
path,
"xtreemfs.default_sp",
writer.write(xattr_value),
xtreemfs::pbrpc::XATTR_FLAGS_REPLACE);
(*output)["result"] = Json::Value(Json::objectValue);
}
void XtfsUtilServer::OpSetDefaultRP(const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()
|| !input.isMember("replication-factor")
|| !input["replication-factor"].isInt()
|| !input.isMember("update-policy")
|| !input["update-policy"].isString()
|| !input.isMember("replication-flags")
|| !input["replication-flags"].isInt()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, factor, "
"update-policy, replication-flags");
return;
}
const string path = input["path"].asString();
Json::Value xattr_value(Json::objectValue);
xattr_value["replication-factor"] = input["replication-factor"];
xattr_value["update-policy"] = input["update-policy"];
xattr_value["replication-flags"] = input["replication-flags"];
Json::FastWriter writer;
volume_->SetXAttr(uc,
path,
"xtreemfs.default_rp",
writer.write(xattr_value),
xtreemfs::pbrpc::XATTR_FLAGS_REPLACE);
(*output)["result"] = Json::Value(Json::objectValue);
}
void XtfsUtilServer::OpSetOSP(const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()
|| !input.isMember("policy")
|| !input["policy"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, policy");
return;
}
const string path = input["path"].asString();
volume_->SetXAttr(uc,
path,
"xtreemfs.osel_policy",
input["policy"].asString(),
xtreemfs::pbrpc::XATTR_FLAGS_REPLACE);
(*output)["result"] = Json::Value(Json::objectValue);
}
void XtfsUtilServer::OpSetRSP(const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()
|| !input.isMember("policy")
|| !input["policy"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, policy");
return;
}
const string path = input["path"].asString();
volume_->SetXAttr(uc,
path,
"xtreemfs.rsel_policy",
input["policy"].asString(),
xtreemfs::pbrpc::XATTR_FLAGS_REPLACE);
(*output)["result"] = Json::Value(Json::objectValue);
}
void XtfsUtilServer::OpSetPolicyAttr(const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()
|| !input.isMember("attribute")
|| !input["attribute"].isString()
|| !input.isMember("value")
|| !input["value"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, attribute,"
" value.");
return;
}
const string path = input["path"].asString();
volume_->SetXAttr(uc,
path,
xtreemfs_policies_prefix_ + input["attribute"].asString(),
input["value"].asString(),
xtreemfs::pbrpc::XATTR_FLAGS_REPLACE);
(*output)["result"] = Json::Value(Json::objectValue);
}
void XtfsUtilServer::OpListPolicyAttr(
const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path.");
return;
}
const string path = input["path"].asString();
boost::scoped_ptr<xtreemfs::pbrpc::listxattrResponse>
xattrs(volume_->ListXAttrs(uc, path, false));
(*output)["result"] = Json::Value(Json::objectValue);
for (int i = 0; i < xattrs->xattrs_size(); ++i) {
if (boost::starts_with(xattrs->xattrs(i).name(),
xtreemfs_policies_prefix_)) {
// Remove "xtreemfs.policies." from the XAttr key.
std::string policy_attr_name =
xattrs->xattrs(i).name().substr(xtreemfs_policies_prefix_.length());
(*output)["result"][policy_attr_name] = xattrs->xattrs(i).value();
}
}
}
void XtfsUtilServer::OpSetReplicationPolicy(
const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()
|| !input.isMember("policy")
|| !input["policy"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, policy");
return;
}
const string policy_name = input["policy"].asString();
if (policy_name != "ronly"
&& policy_name != "WqRq"
&& policy_name != "WaR1"
&& policy_name != "") {
(*output)["error"] = Json::Value("Policy must be one of the following: "
"<empty string>, ronly, WaR1, WqRq");
return;
}
const string path = input["path"].asString();
volume_->SetXAttr(uc,
path,
"xtreemfs.set_repl_update_policy",
policy_name,
xtreemfs::pbrpc::XATTR_FLAGS_REPLACE);
(*output)["result"] = Json::Value(Json::objectValue);
if (policy_name == "ronly" || policy_name == "") {
// Actual permissions of the file probably changed, update cache.
try {
xtreemfs::pbrpc::Stat stat;
volume_->GetAttr(uc, path, true, &stat);
} catch (const exception&) {
// Ignore errors.
}
}
}
void XtfsUtilServer::OpAddReplica(
const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()
|| !input.isMember("osd")
|| !input["osd"].isString()
|| !input.isMember("replication-flags")
|| !input["replication-flags"].isInt()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, osd, "
"replication-flags");
return;
}
const int repl_flags = input["replication-flags"].asInt();
const string path = input["path"].asString();
string osd_name = input["osd"].asString();
const bool auto_select = boost::to_upper_copy(osd_name) == "AUTO";
if (auto_select) {
list<string> osds;
volume_->GetSuitableOSDs(uc, path, 1, &osds);
if (osds.size() == 0) {
(*output)["error"] = "No suitable OSD available for new replica.";
return;
}
osd_name = osds.front();
}
// Get the stripe size from the first replica of the file.
// Stripe size must be the same for all replicas.
string json_loc;
volume_->GetXAttr(uc, path, "xtreemfs.locations", &json_loc);
Json::Reader reader;
Json::Value xloc;
if (!reader.parse(json_loc, xloc, false)) {
(*output)["error"] = "Cannot read locations list for file. Invalid JSON.";
return;
}
if (xloc["replicas"].size() == 0) {
(*output)["error"] = "Cannot add replica for a non-assigned file.";
return;
}
const int stripe_size =
xloc["replicas"][0]["striping-policy"]["size"].asInt();
xtreemfs::pbrpc::Replica new_replica;
new_replica.add_osd_uuids(osd_name);
new_replica.set_replication_flags(repl_flags);
new_replica.mutable_striping_policy()->set_width(1);
new_replica.mutable_striping_policy()->set_type(
xtreemfs::pbrpc::STRIPING_POLICY_RAID0);
new_replica.mutable_striping_policy()->set_stripe_size(stripe_size);
volume_->AddReplica(uc, path, new_replica);
(*output)["result"] = Json::Value(Json::objectValue);
(*output)["result"]["osd"] = osd_name;
}
void XtfsUtilServer::OpRemoveReplica(
const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()
|| !input.isMember("osd")
|| !input["osd"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, osd");
return;
}
const string osd_name = input["osd"].asString();
const string path = input["path"].asString();
volume_->RemoveReplica(uc, path, osd_name);
(*output)["result"] = Json::Value(Json::objectValue);
}
void XtfsUtilServer::OpGetSuitableOSDs(
const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path");
return;
}
const string path = input["path"].asString();
list<string> osds;
volume_->GetSuitableOSDs(uc, path, 10, &osds);
(*output)["result"] = Json::Value(Json::objectValue);
(*output)["result"]["osds"] = Json::Value(Json::arrayValue);
for (list<string>::iterator iter = osds.begin();
iter != osds.end(); ++iter) {
try {
// Try to resolve the UUID to hostname and port.
string address = client_->UUIDToAddress(*iter);
(*output)["result"]["osds"].append(*iter + " (" + address+ ")");
} catch(const XtreemFSException&) {
// Ignore errors if the address could not be obtained successfully.
(*output)["result"]["osds"].append(*iter);
}
}
}
void XtfsUtilServer::OpEnableDisableSnapshots(
const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path") || !input["path"].isString() ||
!input.isMember("snapshots_enabled") ||
!input["snapshots_enabled"].isString()) {
(*output)["error"] = Json::Value(
"One of the following fields is missing or has an invalid value:"
" path, snapshots_enabled.");
return;
}
const string path = input["path"].asString();
volume_->SetXAttr(uc,
path,
"xtreemfs.snapshots_enabled",
input["snapshots_enabled"].asString(),
xtreemfs::pbrpc::XATTR_FLAGS_REPLACE);
(*output)["result"] = Json::Value(Json::objectValue);
}
void XtfsUtilServer::OpListSnapshots(
const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path")
|| !input["path"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path.");
return;
}
const string path = input["path"].asString();
string snapshots;
volume_->GetXAttr(uc, path, "xtreemfs.snapshots", &snapshots);
(*output)["result"] = Json::Value(Json::objectValue);
Json::Reader reader;
Json::Value snapshots_json;
// Since 1.3.2 MRCs output the list of snapshots as JSON list.
if (reader.parse(snapshots, snapshots_json, false)) {
(*output)["result"]["list_snapshots"] = snapshots_json;
} else {
(*output)["result"]["list_snapshots"] = Json::Value(snapshots);
}
}
void XtfsUtilServer::OpCreateDeleteSnapshot(
const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path") || !input["path"].isString() ||
!input.isMember("snapshots") || !input["snapshots"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, snapshots.");
return;
}
const string path = input["path"].asString();
volume_->SetXAttr(uc,
path,
"xtreemfs.snapshots",
input["snapshots"].asString(),
xtreemfs::pbrpc::XATTR_FLAGS_REPLACE);
(*output)["result"] = Json::Value(Json::objectValue);
}
void XtfsUtilServer::OpSetRemoveACL(
const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path") || !input["path"].isString() ||
!input.isMember("acl") || !input["acl"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, acl.");
return;
}
const string path = input["path"].asString();
volume_->SetXAttr(uc,
path,
"xtreemfs.acl",
input["acl"].asString(),
xtreemfs::pbrpc::XATTR_FLAGS_REPLACE);
(*output)["result"] = Json::Value(Json::objectValue);
}
void XtfsUtilServer::OpSetVolumeQuota(
const xtreemfs::pbrpc::UserCredentials& uc,
const Json::Value& input,
Json::Value* output) {
if (!input.isMember("path") || !input["path"].isString()
|| !input.isMember("quota") || !input["quota"].isString()) {
(*output)["error"] = Json::Value("One of the following fields is missing or"
" has an invalid value: path, quota.");
return;
}
const string path = input["path"].asString();
const long quota = parseByteNumber(input["quota"].asString());
if (quota == -1) {
(*output)["error"] = Json::Value(
input["quota"].asString() + " is not a valid quota.");
return;
}
if (quota < 0) {
(*output)["error"] = "Quota has to be greater or equal zero (was set to: "
+ boost::lexical_cast<std::string>(quota) + ")";
return;
}
volume_->SetXAttr(uc, path, "xtreemfs.quota",
boost::lexical_cast<std::string>(quota), xtreemfs::pbrpc::XATTR_FLAGS_REPLACE);
(*output)["result"] = Json::Value(Json::objectValue);
}
bool XtfsUtilServer::checkXctlFile(const std::string& path) {
#ifdef __APPLE__
return boost::starts_with(path, "/._" + prefix_.substr(1)) ||
boost::starts_with(path, prefix_);
#else
return boost::starts_with(path, prefix_);
#endif
}
XCtlFile* XtfsUtilServer::FindFile(uid_t uid,
gid_t gid,
const std::string& path,
bool create) {
boost::mutex::scoped_lock lock(xctl_files_mutex_);
map<std::string, XCtlFile*>::iterator iter = xctl_files_.find(path);
if (iter == xctl_files_.end()) {
if (create) {
XCtlFile* file = new XCtlFile();
file->set_user(uid, gid);
xctl_files_[path] = file;
return file;
} else {
return NULL;
}
} else {
XCtlFile* file = iter->second;
if (file->is_owner(uid,gid)) {
return file;
} else {
return NULL;
}
}
}
int XtfsUtilServer::create(uid_t uid,
gid_t gid,
const std::string& path) {
XCtlFile* file = FindFile(uid, gid, path, true);
if (!file) {
// A file with this name exists but belongs to another user.
return -1 * EEXIST;
}
if (file->in_use()) {
return -1 * EAGAIN;
}
return 0;
}
int XtfsUtilServer::read(uid_t uid,
gid_t gid,
const std::string& path,
char* buf,
size_t size,
off_t offset) {
// FIXME(bjko): Support partial full reads.
XCtlFile* file = FindFile(uid, gid, path, false);
if (!file) {
return -1 * ENOENT;
}
const size_t length = file->last_result().size();
if (size < length) {
return -1 * EINVAL;
}
memcpy(buf, file->last_result().c_str(), length);
return length;
}
int XtfsUtilServer::write(uid_t uid,
gid_t gid,
const xtreemfs::pbrpc::UserCredentials& uc,
const std::string& path,
const char *buf,
size_t size) {
XCtlFile* file = FindFile(uid, gid, path, true);
assert(file);
if (file->in_use()) {
return -1 * EAGAIN;
}
file->set_in_use(true);
string input_str(buf, size);
ParseAndExecute(uc, input_str, file);
file->set_in_use(false);
return size;
}
// TODO(mberlin): Fix for WIN32.
int XtfsUtilServer::getattr(uid_t uid,
gid_t gid,
const std::string& path,
struct stat* st_buf) {
XCtlFile* file = FindFile(uid, gid, path, false);
if (!file) {
return -1 * ENOENT;
}
#ifdef __linux
st_buf->st_atim.tv_sec = 0;
st_buf->st_atim.tv_nsec = 0;
st_buf->st_ctim.tv_sec = 0;
st_buf->st_ctim.tv_nsec = 0;
st_buf->st_mtim.tv_sec = 0;
st_buf->st_mtim.tv_nsec = 0;
#elif __APPLE__
st_buf->st_atimespec.tv_sec = 0;
st_buf->st_atimespec.tv_nsec = 0;
st_buf->st_ctimespec.tv_sec = 0;
st_buf->st_ctimespec.tv_nsec = 0;
st_buf->st_mtimespec.tv_sec = 0;
st_buf->st_mtimespec.tv_nsec = 0;
#endif
#ifndef WIN32
st_buf->st_blksize = 1024;
st_buf->st_blocks = 0;
st_buf->st_dev = 0;
st_buf->st_gid = file->get_gid();
st_buf->st_ino = 1;
st_buf->st_mode = S_IFREG | S_IWUSR | S_IRUSR;
st_buf->st_nlink = 1;
st_buf->st_rdev = 0;
st_buf->st_uid = file->get_uid();
st_buf->st_size = file->last_result().size();
#endif // !WIN32
return 0;
}
int XtfsUtilServer::unlink(uid_t uid,
gid_t gid,
const std::string& path) {
XCtlFile* file = FindFile(uid, gid, path, false);
if (!file) {
return -1 * ENOENT;
}
delete xctl_files_[path];
xctl_files_.erase(path);
return 0;
}
} // namespace xtreemfs