New upstream version 8.1.0
This commit is contained in:
231
meta/source/toolkit/BuddyCommTk.cpp
Normal file
231
meta/source/toolkit/BuddyCommTk.cpp
Normal file
@@ -0,0 +1,231 @@
|
||||
#include <program/Program.h>
|
||||
#include <components/buddyresyncer/BuddyResyncer.h>
|
||||
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
|
||||
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
|
||||
#include <common/toolkit/PreallocatedFile.h>
|
||||
|
||||
#include "BuddyCommTk.h"
|
||||
|
||||
// TODO: file handling here is duplicated from storage::StorageTarget, unify the two at some point
|
||||
|
||||
#define BUDDY_NEEDS_RESYNC_FILENAME ".buddyneedsresync"
|
||||
|
||||
namespace {
|
||||
|
||||
enum {
|
||||
BUDDY_RESYNC_UNACKED_FLAG = 1,
|
||||
BUDDY_RESYNC_REQUIRED_FLAG = 2,
|
||||
|
||||
BUDDY_RESYNC_NOT_REQUIRED = 0,
|
||||
BUDDY_RESYNC_NOT_REQUIRED_UNACKED = BUDDY_RESYNC_UNACKED_FLAG,
|
||||
BUDDY_RESYNC_REQUIRED = BUDDY_RESYNC_REQUIRED_FLAG,
|
||||
BUDDY_RESYNC_REQUIRED_UNACKED = BUDDY_RESYNC_REQUIRED_FLAG | BUDDY_RESYNC_UNACKED_FLAG,
|
||||
};
|
||||
|
||||
RWLock buddyNeedsResyncLock;
|
||||
std::unique_ptr<PreallocatedFile<uint8_t>> buddyNeedsResyncFile;
|
||||
boost::optional<TimerQueue::EntryHandle> setBuddyNeedsResyncEntry;
|
||||
|
||||
bool setBuddyNeedsResyncComm(Node& mgmtNode, const MirrorBuddyGroupMapper& bgm, TimerQueue& timerQ,
|
||||
NumNodeID localNodeID);
|
||||
|
||||
void retrySetBuddyNeedsResyncComm(Node& mgmtNode, const MirrorBuddyGroupMapper& bgm,
|
||||
TimerQueue& timerQ, const NumNodeID localNodeID)
|
||||
{
|
||||
const RWLockGuard lock(buddyNeedsResyncLock, SafeRWLock_WRITE);
|
||||
setBuddyNeedsResyncComm(mgmtNode, bgm, timerQ, localNodeID);
|
||||
}
|
||||
|
||||
void setBuddyNeedsResync(Node& mgmtNode, const MirrorBuddyGroupMapper& bgm, TimerQueue& timerQ,
|
||||
const NumNodeID localNodeID, const bool needsResync)
|
||||
{
|
||||
const RWLockGuard lock(buddyNeedsResyncLock, SafeRWLock_WRITE);
|
||||
|
||||
const auto oldState = buddyNeedsResyncFile->read().get_value_or(BUDDY_RESYNC_NOT_REQUIRED);
|
||||
const auto newState = needsResync
|
||||
? BUDDY_RESYNC_REQUIRED_UNACKED
|
||||
: BUDDY_RESYNC_NOT_REQUIRED_UNACKED;
|
||||
|
||||
// if the change has already been requested by some other thread, we should not request it
|
||||
// again - even if the change is unacked, as retrying immediately after a failed communication
|
||||
// attempt is not likely to be successful, we must handle externally started resyncs however,
|
||||
// which *do not* change the buddyneedsresync file contents but *do* use this mechanism to
|
||||
// communicate that a resync has finished and the buddy is good again - these only use us to
|
||||
// set the buddy to "needs no resync" though, so we can still skip setting needs-resync when that
|
||||
// is already pending.
|
||||
if (needsResync
|
||||
&& (oldState & BUDDY_RESYNC_REQUIRED_FLAG) == (newState & BUDDY_RESYNC_REQUIRED_FLAG))
|
||||
return;
|
||||
|
||||
// cancel any pending retries, we will send a message to mgmt anyway.
|
||||
if (setBuddyNeedsResyncEntry)
|
||||
setBuddyNeedsResyncEntry->cancel();
|
||||
|
||||
buddyNeedsResyncFile->write(newState);
|
||||
|
||||
if (!setBuddyNeedsResyncComm(mgmtNode, bgm, timerQ, localNodeID))
|
||||
LOG(GENERAL, CRITICAL, "Could not reach mgmt for state update, will retry.",
|
||||
("buddyNeedsResync", needsResync));
|
||||
}
|
||||
|
||||
bool getBuddyNeedsResync()
|
||||
{
|
||||
const RWLockGuard lock(buddyNeedsResyncLock, SafeRWLock_READ);
|
||||
|
||||
const auto state = buddyNeedsResyncFile->read().get_value_or(BUDDY_RESYNC_NOT_REQUIRED);
|
||||
return state & BUDDY_RESYNC_REQUIRED_FLAG;
|
||||
}
|
||||
|
||||
bool setBuddyNeedsResyncComm(Node& mgmtNode, const MirrorBuddyGroupMapper& bgm, TimerQueue& timerQ,
|
||||
const NumNodeID localNodeID)
|
||||
{
|
||||
// this is a timer callback. as such we must be prepared to deal with the fact that we were
|
||||
// cancelled *after* we were dequeued and started executing, but were blocked on the lock in
|
||||
// retrySetBuddyNeedsResyncComm. always reading the current state and sending that fixes this:
|
||||
// if the state is not unacked we can return without doing anything, and if it is nobody can
|
||||
// change it while we are using it.
|
||||
const auto state = buddyNeedsResyncFile->read().get_value_or(BUDDY_RESYNC_NOT_REQUIRED);
|
||||
const bool needsResync = state & BUDDY_RESYNC_REQUIRED_FLAG;
|
||||
|
||||
if (!(state & BUDDY_RESYNC_UNACKED_FLAG))
|
||||
return true;
|
||||
|
||||
const TargetConsistencyState stateToSet = needsResync
|
||||
? TargetConsistencyState_NEEDS_RESYNC
|
||||
: TargetConsistencyState_GOOD;
|
||||
|
||||
bool currentIsPrimary;
|
||||
const uint16_t buddyTargetID = bgm.getBuddyTargetID(localNodeID.val(), ¤tIsPrimary);
|
||||
|
||||
// until mgmt handles resync decision, refuse to set a primary to needs-resync locally.
|
||||
if (!currentIsPrimary)
|
||||
{
|
||||
buddyNeedsResyncFile->write(BUDDY_RESYNC_NOT_REQUIRED);
|
||||
return true;
|
||||
}
|
||||
|
||||
UInt16List targetIDList(1, buddyTargetID);
|
||||
UInt8List stateList(1, stateToSet);
|
||||
|
||||
SetTargetConsistencyStatesMsg msg(NODETYPE_Meta, &targetIDList, &stateList, false);
|
||||
|
||||
const auto respMsg = MessagingTk::requestResponse(mgmtNode, msg,
|
||||
NETMSGTYPE_SetTargetConsistencyStatesResp);
|
||||
|
||||
if (!respMsg)
|
||||
{
|
||||
setBuddyNeedsResyncEntry = timerQ.enqueue(std::chrono::seconds(5), [&, localNodeID] {
|
||||
retrySetBuddyNeedsResyncComm(mgmtNode, bgm, timerQ, localNodeID);
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
auto* respMsgCast = (SetTargetConsistencyStatesRespMsg*)respMsg.get();
|
||||
|
||||
if (respMsgCast->getValue() != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, "Management node did not accept target states.", buddyTargetID,
|
||||
needsResync);
|
||||
buddyNeedsResyncFile->write(BUDDY_RESYNC_NOT_REQUIRED);
|
||||
return true;
|
||||
}
|
||||
|
||||
buddyNeedsResyncFile->write(state & ~BUDDY_RESYNC_UNACKED_FLAG);
|
||||
if (state & BUDDY_RESYNC_REQUIRED_FLAG)
|
||||
LOG(GENERAL, CRITICAL, "Marked secondary buddy for needed resync.",
|
||||
("primary node", localNodeID.val()));
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace BuddyCommTk
|
||||
{
|
||||
|
||||
void prepareBuddyNeedsResyncState(Node& mgmtNode, const MirrorBuddyGroupMapper& bgm,
|
||||
TimerQueue& timerQ, const NumNodeID localNodeID)
|
||||
{
|
||||
buddyNeedsResyncFile = std::make_unique<PreallocatedFile<uint8_t>>(
|
||||
BUDDY_NEEDS_RESYNC_FILENAME, S_IRUSR | S_IWUSR);
|
||||
|
||||
if (buddyNeedsResyncFile->read().get_value_or(0) & BUDDY_RESYNC_UNACKED_FLAG)
|
||||
{
|
||||
setBuddyNeedsResyncEntry = timerQ.enqueue(std::chrono::seconds(0), [&, localNodeID] {
|
||||
retrySetBuddyNeedsResyncComm(mgmtNode, bgm, timerQ, localNodeID);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void checkBuddyNeedsResync()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
MirrorBuddyGroupMapper* metaBuddyGroups = app->getMetaBuddyGroupMapper();
|
||||
TargetStateStore* metaNodeStates = app->getMetaStateStore();
|
||||
InternodeSyncer* internodeSyncer = app->getInternodeSyncer();
|
||||
BuddyResyncer* buddyResyncer = app->getBuddyResyncer();
|
||||
|
||||
NumNodeID localID = app->getLocalNodeNumID();
|
||||
bool isPrimary;
|
||||
NumNodeID buddyID = NumNodeID(metaBuddyGroups->getBuddyTargetID(localID.val(), &isPrimary) );
|
||||
|
||||
if (isPrimary) // Only do the check if we are the primary.
|
||||
{
|
||||
// check if the secondary is set to needs-resync by the mgmtd.
|
||||
TargetConsistencyState consistencyState = internodeSyncer->getNodeConsistencyState();
|
||||
|
||||
// If our own state is not good, don't start resync (wait until InternodeSyncer sets us
|
||||
// good again).
|
||||
if (consistencyState != TargetConsistencyState_GOOD)
|
||||
{
|
||||
LOG_DEBUG(__func__, Log_DEBUG,
|
||||
"Local node state is not good, won't check buddy state.");
|
||||
return;
|
||||
}
|
||||
|
||||
CombinedTargetState buddyState;
|
||||
if (!metaNodeStates->getState(buddyID.val(), buddyState) )
|
||||
{
|
||||
LOG_DEBUG(__func__, Log_DEBUG, "Buddy state is invalid for node ID "
|
||||
+ buddyID.str() + ".");
|
||||
return;
|
||||
}
|
||||
|
||||
if (buddyState == CombinedTargetState(TargetReachabilityState_ONLINE,
|
||||
TargetConsistencyState_NEEDS_RESYNC) )
|
||||
{
|
||||
FhgfsOpsErr resyncRes = buddyResyncer->startResync();
|
||||
|
||||
if (resyncRes == FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
LOG(MIRRORING, WARNING,
|
||||
"Starting buddy resync job.", ("Buddy node ID", buddyID.val()));
|
||||
}
|
||||
else if (resyncRes == FhgfsOpsErr_INUSE)
|
||||
{
|
||||
LOG(MIRRORING, WARNING,
|
||||
"Resync job currently running.", ("Buddy node ID", buddyID.val()));
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(MIRRORING, WARNING,
|
||||
"Starting buddy resync job failed.", ("Buddy node ID", buddyID.val()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void setBuddyNeedsResync(const std::string& path, bool needsResync)
|
||||
{
|
||||
auto* const app = Program::getApp();
|
||||
|
||||
::setBuddyNeedsResync(*app->getMgmtNodes()->referenceFirstNode(),
|
||||
*app->getMetaBuddyGroupMapper(), *app->getTimerQueue(),
|
||||
app->getLocalNode().getNumID(), needsResync);
|
||||
}
|
||||
|
||||
bool getBuddyNeedsResync()
|
||||
{
|
||||
return ::getBuddyNeedsResync();
|
||||
}
|
||||
};
|
||||
|
||||
25
meta/source/toolkit/BuddyCommTk.h
Normal file
25
meta/source/toolkit/BuddyCommTk.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/nodes/NumNodeID.h>
|
||||
|
||||
class MirrorBuddyGroupMapper;
|
||||
class TimerQueue;
|
||||
|
||||
/**
|
||||
* This contains all the functions regarding the communication with the mirror buddy.
|
||||
* In the storage server, these mostly live in the StorageTargets class, but because the metadata
|
||||
* server only has a single metadata target at the moment, we group them here.
|
||||
*/
|
||||
namespace BuddyCommTk
|
||||
{
|
||||
void prepareBuddyNeedsResyncState(Node& mgmtNode, const MirrorBuddyGroupMapper& bgm,
|
||||
TimerQueue& timerQ, NumNodeID localNodeID);
|
||||
|
||||
void checkBuddyNeedsResync();
|
||||
void setBuddyNeedsResync(const std::string& path, bool needsResync);
|
||||
bool getBuddyNeedsResync();
|
||||
};
|
||||
|
||||
152
meta/source/toolkit/StorageTkEx.cpp
Normal file
152
meta/source/toolkit/StorageTkEx.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
#include <program/Program.h>
|
||||
#include "StorageTkEx.h"
|
||||
|
||||
#include <sys/xattr.h>
|
||||
|
||||
#define STORAGETK_FORMAT_XATTR "xattr"
|
||||
|
||||
|
||||
/**
|
||||
* Note: Creates the file only if it does not exist yet.
|
||||
*
|
||||
* @return true if format file was created or existed already
|
||||
*/
|
||||
bool StorageTkEx::createStorageFormatFile(const std::string pathStr)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
Config* cfg = app->getConfig();
|
||||
|
||||
StringMap formatProperties;
|
||||
|
||||
formatProperties[STORAGETK_FORMAT_XATTR] = cfg->getStoreUseExtendedAttribs() ? "true" : "false";
|
||||
|
||||
return StorageTk::createStorageFormatFile(pathStr, STORAGETK_FORMAT_CURRENT_VERSION,
|
||||
&formatProperties);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compatibility and validity check of storage format file contents.
|
||||
*
|
||||
* @param pathStr path to the main storage working directory (not including a filename)
|
||||
* @throws exception if format file was not valid (eg didn't exist or contained wrong version).
|
||||
*/
|
||||
void StorageTkEx::checkStorageFormatFile(const std::string pathStr)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
Config* cfg = app->getConfig();
|
||||
|
||||
StringMap formatProperties;
|
||||
StringMapIter formatIter;
|
||||
|
||||
formatProperties = StorageTk::loadAndUpdateStorageFormatFile(pathStr,
|
||||
STORAGETK_FORMAT_MIN_VERSION, STORAGETK_FORMAT_CURRENT_VERSION);
|
||||
|
||||
formatIter = formatProperties.find(STORAGETK_FORMAT_XATTR);
|
||||
if(formatIter == formatProperties.end() )
|
||||
{
|
||||
throw InvalidConfigException(std::string("Property missing from storage format file: ") +
|
||||
STORAGETK_FORMAT_XATTR " (dir: " + pathStr + ")");
|
||||
}
|
||||
|
||||
if(cfg->getStoreUseExtendedAttribs() != StringTk::strToBool(formatIter->second) )
|
||||
{
|
||||
throw InvalidConfigException("Mismatch of extended attributes settings in storage format file"
|
||||
" and daemon config file.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: intended to be used with fsck only.
|
||||
* No locks at all at the moment
|
||||
*/
|
||||
FhgfsOpsErr StorageTkEx::getContDirIDsIncremental(unsigned hashDirNum, bool buddyMirrored,
|
||||
int64_t lastOffset, unsigned maxOutEntries, StringList* outContDirIDs, int64_t* outNewOffset)
|
||||
{
|
||||
const char* logContext = "StorageTkEx (get cont dir ids inc)";
|
||||
App* app = Program::getApp();
|
||||
|
||||
FhgfsOpsErr retVal = FhgfsOpsErr_INTERNAL;
|
||||
unsigned numEntries = 0;
|
||||
struct dirent* dirEntry = NULL;
|
||||
|
||||
unsigned firstLevelHashDir;
|
||||
unsigned secondLevelHashDir;
|
||||
StorageTk::splitHashDirs(hashDirNum, &firstLevelHashDir, &secondLevelHashDir);
|
||||
|
||||
const Path* dentriesPath =
|
||||
buddyMirrored ? app->getBuddyMirrorDentriesPath() : app->getDentriesPath();
|
||||
|
||||
std::string path = StorageTkEx::getMetaDentriesHashDir(dentriesPath->str(),
|
||||
firstLevelHashDir, secondLevelHashDir);
|
||||
|
||||
DIR* dirHandle = opendir(path.c_str() );
|
||||
if(!dirHandle)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("Unable to open dentries directory: ") +
|
||||
path + ". SysErr: " + System::getErrString() );
|
||||
|
||||
goto return_res;
|
||||
}
|
||||
|
||||
|
||||
errno = 0; // recommended by posix (readdir(3p) )
|
||||
|
||||
// seek to offset
|
||||
seekdir(dirHandle, lastOffset); // (seekdir has no return value)
|
||||
|
||||
// the actual entry reading
|
||||
while ( (numEntries < maxOutEntries) && (dirEntry = StorageTk::readdirFiltered(dirHandle)) )
|
||||
{
|
||||
std::string dirName = dirEntry->d_name;
|
||||
std::string dirID = dirName.substr(0, dirName.length() );
|
||||
|
||||
*outNewOffset = dirEntry->d_off;
|
||||
|
||||
// skip root dir if this is not the root MDS
|
||||
NumNodeID rootNodeNumID = Program::getApp()->getMetaRoot().getID();
|
||||
NumNodeID localNodeNumID = buddyMirrored
|
||||
? NumNodeID(Program::getApp()->getMetaBuddyGroupMapper()->getLocalGroupID())
|
||||
: Program::getApp()->getLocalNode().getNumID();
|
||||
if (unlikely ( (dirID.compare(META_ROOTDIR_ID_STR) == 0) &&
|
||||
(localNodeNumID != rootNodeNumID) ))
|
||||
continue;
|
||||
|
||||
outContDirIDs->push_back(dirID);
|
||||
numEntries++;
|
||||
}
|
||||
|
||||
if(!dirEntry && errno)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("Unable to fetch dentry from: ") +
|
||||
path + ". SysErr: " + System::getErrString() );
|
||||
}
|
||||
else
|
||||
{ // all entries read
|
||||
retVal = FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
closedir(dirHandle);
|
||||
|
||||
return_res:
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: intended to be used with fsck only
|
||||
*/
|
||||
bool StorageTkEx::getNextContDirID(unsigned hashDirNum, bool buddyMirrored, int64_t lastOffset,
|
||||
std::string* outID, int64_t* outNewOffset)
|
||||
{
|
||||
*outID = "";
|
||||
StringList outIDs;
|
||||
FhgfsOpsErr retVal = getContDirIDsIncremental(hashDirNum, buddyMirrored, lastOffset, 1, &outIDs,
|
||||
outNewOffset);
|
||||
if ( ( outIDs.empty() ) || ( retVal != FhgfsOpsErr_SUCCESS ) )
|
||||
return false;
|
||||
else
|
||||
{
|
||||
*outID = outIDs.front();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
60
meta/source/toolkit/StorageTkEx.h
Normal file
60
meta/source/toolkit/StorageTkEx.h
Normal file
@@ -0,0 +1,60 @@
|
||||
#pragma once
|
||||
|
||||
#include <app/config/Config.h>
|
||||
#include <common/Common.h>
|
||||
#include <common/storage/Path.h>
|
||||
#include <common/threading/Mutex.h>
|
||||
#include <common/toolkit/MetaStorageTk.h>
|
||||
#include <common/toolkit/StorageTk.h>
|
||||
|
||||
#include <dirent.h>
|
||||
|
||||
|
||||
/*
|
||||
* Note: Some inliners are in Commons MetaStoreTk::
|
||||
*/
|
||||
|
||||
#define STORAGETK_FORMAT_MIN_VERSION 3
|
||||
#define STORAGETK_FORMAT_CURRENT_VERSION 4
|
||||
|
||||
// forward declarations
|
||||
class DirInode;
|
||||
class FileInode;
|
||||
|
||||
class StorageTkEx
|
||||
{
|
||||
public:
|
||||
static bool createStorageFormatFile(const std::string pathStr);
|
||||
static void checkStorageFormatFile(const std::string pathStr);
|
||||
|
||||
static FhgfsOpsErr getContDirIDsIncremental(unsigned hashDirNum, bool buddyMirrored,
|
||||
int64_t lastOffset, unsigned maxOutEntries, StringList* outContDirIDs,
|
||||
int64_t* outNewOffset);
|
||||
static bool getNextContDirID(unsigned hashDirNum, bool buddyMirrored, int64_t lastOffset,
|
||||
std::string* outID, int64_t* outNewOffset);
|
||||
|
||||
private:
|
||||
StorageTkEx()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
|
||||
// inliners
|
||||
|
||||
static std::string getMetaInodeHashDir(const std::string entriesPath,
|
||||
unsigned firstLevelhashDirNum, unsigned secondLevelhashDirNum)
|
||||
{
|
||||
return entriesPath + "/" + StringTk::uintToHexStr(firstLevelhashDirNum) + "/"
|
||||
+ StringTk::uintToHexStr(secondLevelhashDirNum);
|
||||
}
|
||||
|
||||
static std::string getMetaDentriesHashDir(const std::string structurePath,
|
||||
unsigned firstLevelhashDirNum, unsigned secondLevelhashDirNum)
|
||||
{
|
||||
return structurePath + "/" + StringTk::uintToHexStr(firstLevelhashDirNum) + "/"
|
||||
+ StringTk::uintToHexStr(secondLevelhashDirNum);
|
||||
}
|
||||
};
|
||||
|
||||
208
meta/source/toolkit/XAttrTk.cpp
Normal file
208
meta/source/toolkit/XAttrTk.cpp
Normal file
@@ -0,0 +1,208 @@
|
||||
#include "XAttrTk.h"
|
||||
|
||||
#include <app/App.h>
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include <sys/xattr.h>
|
||||
#include <cerrno>
|
||||
|
||||
namespace XAttrTk
|
||||
{
|
||||
|
||||
const std::string UserXAttrPrefix("user.bgXA.");
|
||||
|
||||
|
||||
std::pair<FhgfsOpsErr, std::vector<std::string>> listXAttrs(const std::string& path)
|
||||
{
|
||||
ssize_t size = ::listxattr(path.c_str(), NULL, 0); // get size of raw list
|
||||
|
||||
if(size < 0)
|
||||
{
|
||||
LOG(GENERAL, ERR, "listxattr failed", path, sysErr);
|
||||
return {FhgfsOpsErr_INTERNAL, {}};
|
||||
}
|
||||
|
||||
std::unique_ptr<char[]> xAttrRawList(new (std::nothrow) char[size]);
|
||||
|
||||
if (!xAttrRawList)
|
||||
return {FhgfsOpsErr_INTERNAL, {}};
|
||||
|
||||
size = ::listxattr(path.c_str(), xAttrRawList.get(), size); // get actual raw list
|
||||
|
||||
if (size >= 0)
|
||||
{
|
||||
std::vector<std::string> names;
|
||||
|
||||
StringTk::explode(std::string(xAttrRawList.get(), size), '\0', &names);
|
||||
return {FhgfsOpsErr_SUCCESS, std::move(names)};
|
||||
}
|
||||
|
||||
int err = errno;
|
||||
|
||||
switch (err)
|
||||
{
|
||||
case ERANGE:
|
||||
case E2BIG:
|
||||
return {FhgfsOpsErr_RANGE, {}};
|
||||
|
||||
case ENOENT:
|
||||
return {FhgfsOpsErr_PATHNOTEXISTS, {}};
|
||||
|
||||
default: // don't forward other errors to the client, but log them on the server
|
||||
LOG(GENERAL, ERR, "listxattr failed", path, sysErr);
|
||||
return {FhgfsOpsErr_INTERNAL, {}};
|
||||
}
|
||||
}
|
||||
|
||||
std::tuple<FhgfsOpsErr, std::vector<char>, ssize_t> getXAttr(const std::string& path,
|
||||
const std::string& name, size_t maxSize)
|
||||
{
|
||||
std::vector<char> result(maxSize, 0);
|
||||
|
||||
ssize_t size = getxattr(path.c_str(), name.c_str(), &result.front(), result.size());
|
||||
|
||||
if (size >= 0)
|
||||
{
|
||||
result.resize(std::min<size_t>(size, maxSize));
|
||||
return std::make_tuple(FhgfsOpsErr_SUCCESS, std::move(result), size);
|
||||
}
|
||||
|
||||
switch (errno)
|
||||
{
|
||||
case ENODATA:
|
||||
return std::make_tuple(FhgfsOpsErr_NODATA, std::vector<char>(), ssize_t(0));
|
||||
|
||||
case ERANGE:
|
||||
case E2BIG:
|
||||
return std::make_tuple(FhgfsOpsErr_RANGE, std::vector<char>(), ssize_t(0));
|
||||
|
||||
case ENOENT:
|
||||
return std::make_tuple(FhgfsOpsErr_PATHNOTEXISTS, std::vector<char>(), ssize_t(0));
|
||||
|
||||
default: // don't forward other errors to the client, but log them on the server
|
||||
LOG(GENERAL, ERR, "getxattr failed", path, name, sysErr);
|
||||
return std::make_tuple(FhgfsOpsErr_INTERNAL, std::vector<char>(), ssize_t(0));
|
||||
}
|
||||
}
|
||||
|
||||
void sanitizeForUser(std::vector<std::string>& names)
|
||||
{
|
||||
removeMetadataAttrs(names);
|
||||
|
||||
for (auto it = names.begin(); it != names.end(); ++it)
|
||||
it->erase(0, UserXAttrPrefix.size());
|
||||
}
|
||||
|
||||
static bool isMetaAttr(const std::string& attrName)
|
||||
{
|
||||
return attrName.compare(0, UserXAttrPrefix.size(), UserXAttrPrefix) != 0;
|
||||
}
|
||||
|
||||
void removeMetadataAttrs(std::vector<std::string>& names)
|
||||
{
|
||||
names.erase(
|
||||
std::remove_if(names.begin(), names.end(), isMetaAttr),
|
||||
names.end());
|
||||
}
|
||||
|
||||
FhgfsOpsErr setUserXAttr(const std::string& path, const std::string& name, const void* value,
|
||||
size_t size, int flags)
|
||||
{
|
||||
const bool limitXAttrListLength = Program::getApp()->getConfig()->getLimitXAttrListLength();
|
||||
|
||||
int res = limitXAttrListLength
|
||||
? setxattr(path.c_str(), (UserXAttrPrefix + name).c_str(), value, size, flags | XATTR_REPLACE)
|
||||
: setxattr(path.c_str(), (UserXAttrPrefix + name).c_str(), value, size, flags);
|
||||
|
||||
// if xattr list length is limited and we were not able to replace the attribute, we have to
|
||||
// create a new one. (the user may specify XATTR_REPLACE as well, so that has to be checked for)
|
||||
// if we have to create a new attribute, we must ensure that the xattr list length after the
|
||||
// creation of the attribute does not exceed XATTR_LIST_MAX.
|
||||
// use a large hash of mutexes to exclude concurrent setxattr operations on the same path.
|
||||
// ideally we would want to use one mutex per worker thread, and ensure that each path has its
|
||||
// own mutex, but that's not possible. using 1024 mutexes (more than one per worker, in the
|
||||
// default configuration) and hashed pathnames is pretty much the best we can do here.
|
||||
if (res < 0 && limitXAttrListLength && errno == ENODATA && !(flags & XATTR_REPLACE))
|
||||
{
|
||||
static std::array<Mutex, 1024> mutexHash;
|
||||
|
||||
Mutex& mtx = mutexHash[std::hash<std::string>()(path) % mutexHash.size()];
|
||||
|
||||
std::lock_guard<Mutex> lock(mtx);
|
||||
|
||||
ssize_t listRes = ::listxattr(path.c_str(), NULL, 0);
|
||||
if (listRes < 0)
|
||||
res = listRes;
|
||||
else if (listRes + UserXAttrPrefix.size() + name.size() + 1 > XATTR_LIST_MAX)
|
||||
return FhgfsOpsErr_NOSPACE;
|
||||
|
||||
res = setxattr(path.c_str(), (UserXAttrPrefix + name).c_str(), value, size, flags);
|
||||
}
|
||||
|
||||
if (res == 0)
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
|
||||
switch (errno)
|
||||
{
|
||||
case EEXIST:
|
||||
return FhgfsOpsErr_EXISTS;
|
||||
|
||||
case ENODATA:
|
||||
return FhgfsOpsErr_NODATA;
|
||||
|
||||
case ERANGE:
|
||||
case E2BIG:
|
||||
return FhgfsOpsErr_RANGE;
|
||||
|
||||
case ENOENT:
|
||||
return FhgfsOpsErr_PATHNOTEXISTS;
|
||||
|
||||
case ENOSPC:
|
||||
return FhgfsOpsErr_NOSPACE;
|
||||
|
||||
default: // don't forward other errors to the client, but log them on the server
|
||||
LOG(GENERAL, ERR, "failed to set xattr", path, name, sysErr);
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
}
|
||||
|
||||
FhgfsOpsErr removeUserXAttr(const std::string& path, const std::string& name)
|
||||
{
|
||||
int res = removexattr(path.c_str(), (UserXAttrPrefix + name).c_str());
|
||||
|
||||
if (res == 0)
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
|
||||
switch (errno)
|
||||
{
|
||||
case ENODATA:
|
||||
return FhgfsOpsErr_NODATA;
|
||||
|
||||
case ERANGE:
|
||||
case E2BIG:
|
||||
return FhgfsOpsErr_RANGE;
|
||||
|
||||
case ENOENT:
|
||||
return FhgfsOpsErr_PATHNOTEXISTS;
|
||||
|
||||
default: // don't forward other errors to the client, but log them on the server
|
||||
LOG(GENERAL, ERR, "failed to remove xattr", path, name, sysErr);
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<FhgfsOpsErr, std::vector<std::string>> listUserXAttrs(const std::string& path)
|
||||
{
|
||||
auto listRes = listXAttrs(path);
|
||||
if (listRes.first == FhgfsOpsErr_SUCCESS)
|
||||
sanitizeForUser(listRes.second);
|
||||
|
||||
return listRes;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
33
meta/source/toolkit/XAttrTk.h
Normal file
33
meta/source/toolkit/XAttrTk.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/storage/StorageErrors.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace XAttrTk
|
||||
{
|
||||
extern const std::string UserXAttrPrefix;
|
||||
|
||||
std::pair<FhgfsOpsErr, std::vector<std::string>> listXAttrs(const std::string& path);
|
||||
|
||||
std::tuple<FhgfsOpsErr, std::vector<char>, ssize_t> getXAttr(const std::string& path,
|
||||
const std::string& name, size_t maxSize);
|
||||
|
||||
void sanitizeForUser(std::vector<std::string>& names);
|
||||
|
||||
void removeMetadataAttrs(std::vector<std::string>& names);
|
||||
|
||||
FhgfsOpsErr setUserXAttr(const std::string& path, const std::string& name, const void* value,
|
||||
size_t size, int flags);
|
||||
|
||||
FhgfsOpsErr removeUserXAttr(const std::string& path, const std::string& name);
|
||||
|
||||
inline std::tuple<FhgfsOpsErr, std::vector<char>, ssize_t> getUserXAttr(const std::string& path,
|
||||
const std::string& name, size_t maxSize)
|
||||
{
|
||||
return getXAttr(path, UserXAttrPrefix + name, maxSize);
|
||||
}
|
||||
|
||||
std::pair<FhgfsOpsErr, std::vector<std::string>> listUserXAttrs(const std::string& path);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user