New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

1530
storage/source/app/App.cpp Normal file

File diff suppressed because it is too large Load Diff

424
storage/source/app/App.h Normal file
View File

@@ -0,0 +1,424 @@
#pragma once
#include <app/config/Config.h>
#include <common/app/log/LogContext.h>
#include <common/app/log/Logger.h>
#include <common/app/AbstractApp.h>
#include <common/components/streamlistenerv2/ConnAcceptor.h>
#include <common/components/streamlistenerv2/StreamListenerV2.h>
#include <common/components/worker/queue/MultiWorkQueue.h>
#include <common/components/worker/Worker.h>
#include <common/components/TimerQueue.h>
#include <common/nodes/MirrorBuddyGroupMapper.h>
#include <common/nodes/NodeStoreServers.h>
#include <common/nodes/TargetStateStore.h>
#include <common/storage/Path.h>
#include <common/storage/Storagedata.h>
#include <common/toolkit/AcknowledgmentStore.h>
#include <common/storage/quota/ExceededQuotaStore.h>
#include <common/toolkit/NetFilter.h>
#include <common/Common.h>
#include <components/benchmarker/StorageBenchOperator.h>
#include <components/buddyresyncer/BuddyResyncer.h>
#include <components/chunkfetcher/ChunkFetcher.h>
#include <components/DatagramListener.h>
#include <components/InternodeSyncer.h>
#include <components/StorageStatsCollector.h>
#include <net/message/NetMessageFactory.h>
#include <nodes/StorageNodeOpStats.h>
#include <session/SessionStore.h>
#include <storage/ChunkLockStore.h>
#include <storage/ChunkStore.h>
#include <storage/SyncedStoragePaths.h>
#include <storage/StorageTargets.h>
#include <toolkit/QuotaTk.h>
#ifndef BEEGFS_VERSION
#error BEEGFS_VERSION undefined
#endif
// program return codes
#define APPCODE_NO_ERROR 0
#define APPCODE_INVALID_CONFIG 1
#define APPCODE_INITIALIZATION_ERROR 2
#define APPCODE_RUNTIME_ERROR 3
typedef std::list<Worker*> WorkerList;
typedef WorkerList::iterator WorkerListIter;
typedef std::vector<StreamListenerV2*> StreamLisVec;
typedef StreamLisVec::iterator StreamLisVecIter;
// forward declarations
class LogContext;
class App : public AbstractApp
{
public:
App(int argc, char** argv);
virtual ~App();
virtual void run() override;
virtual void stopComponents() override;
virtual void handleComponentException(std::exception& e) override;
virtual void handleNetworkInterfaceFailure(const std::string& devname) override;
void handleNetworkInterfacesChanged(NicAddressList nicList);
private:
int appResult;
int argc;
char** argv;
Config* cfg;
LogContext* log;
std::list<std::string> allowedInterfaces;
LockFD pidFileLockFD;
std::vector<LockFD> storageTargetLocks;
NetFilter* netFilter; // empty filter means "all nets allowed"
NetFilter* tcpOnlyFilter; // for IPs that allow only plain TCP (no RDMA etc)
std::shared_ptr<Node> localNode;
NodeStoreServers* mgmtNodes;
NodeStoreServers* metaNodes; // needed for backward communication introduced with GAM integration
NodeStoreServers* storageNodes;
TargetMapper* targetMapper;
MirrorBuddyGroupMapper* mirrorBuddyGroupMapper; // maps targets to mirrorBuddyGroups
TargetStateStore* targetStateStore; // map storage targets to a state
MultiWorkQueueMap workQueueMap; // maps targetIDs to WorkQueues
SessionStore* sessions;
StorageNodeOpStats* nodeOperationStats; // file system operation statistics
AcknowledgmentStore* ackStore;
NetMessageFactory* netMessageFactory;
StorageTargets* storageTargets; // target IDs and corresponding storage paths
SyncedStoragePaths* syncedStoragePaths; // serializes access to paths (=> entryIDs)
StorageBenchOperator* storageBenchOperator; // benchmark for the storage
DatagramListener* dgramListener;
ConnAcceptor* connAcceptor;
StatsCollector* statsCollector;
InternodeSyncer* internodeSyncer;
TimerQueue* timerQueue;
ChunkFetcher* chunkFetcher;
unsigned numStreamListeners; // value copied from cfg (for performance)
StreamLisVec streamLisVec;
WorkerList workerList;
bool workersRunning;
Mutex mutexWorkersRunning;
ChunkStore* chunkDirStore;
unsigned nextNumaBindTarget; // the numa node to which we will bind the next component thread
ExceededQuotaPerTarget exceededQuotaStores;
BuddyResyncer* buddyResyncer;
ChunkLockStore* chunkLockStore;
std::unique_ptr<StoragePoolStore> storagePoolStore;
void* dlOpenHandleLibZfs; // handle of the libzfs from dlopen
bool libZfsErrorReported;
void runNormal();
void streamListenersInit();
void streamListenersStart();
void streamListenersStop();
void streamListenersDelete();
void streamListenersJoin();
void workersInit();
void workersStart();
void workersStop();
void workersDelete();
void workersJoin();
void initLogging();
void initDataObjects();
void initBasicNetwork();
void initLocalNodeIDs(NumNodeID& outLocalNodeNumID);
void initLocalNode(NumNodeID localNodeNumID);
void initLocalNodeNumIDFile(NumNodeID localNodeNumID) ;
void preinitStorage();
void checkTargetsUUIDs();
void initStorage();
void initPostTargetRegistration();
void initComponents();
void startComponents();
void joinComponents();
bool waitForMgmtNode();
bool preregisterNode(NumNodeID& outLocalNodeNumID);
boost::optional<std::map<uint16_t, std::unique_ptr<StorageTarget>>> preregisterTargets(
const NumNodeID localNodeNumID);
bool preregisterTarget(Node& mgmtNode, std::string targetID, uint16_t targetNumID,
uint16_t* outNewTargetNumID);
bool registerAndDownloadMgmtInfo();
void logInfos();
void setUmask();
void daemonize();
void registerSignalHandler();
static void signalHandler(int sig);
bool restoreSessions();
bool storeSessions();
bool deleteSessionFiles();
bool openLibZfs();
bool closeLibZfs();
public:
/**
* Get one of the available stream listeners based on the socket file descriptor number.
* This is to load-balance the sockets over all available stream listeners and ensure that
* sockets are not bouncing between different stream listeners.
*
* Note that IB connections eat two fd numbers, so 2 and multiples of 2 might not be a good
* value for number of stream listeners.
*/
virtual StreamListenerV2* getStreamListenerByFD(int fd) override
{
return streamLisVec[fd % numStreamListeners];
}
// getters & setters
virtual const ICommonConfig* getCommonConfig() const override
{
return cfg;
}
virtual const NetFilter* getNetFilter() const override
{
return netFilter;
}
virtual const NetFilter* getTcpOnlyFilter() const override
{
return tcpOnlyFilter;
}
virtual const AbstractNetMessageFactory* getNetMessageFactory() const override
{
return netMessageFactory;
}
AcknowledgmentStore* getAckStore() const
{
return ackStore;
}
Config* getConfig() const
{
return cfg;
}
void updateLocalNicList(NicAddressList& localNicList);
Node& getLocalNode() const
{
return *localNode;
}
NodeStoreServers* getMgmtNodes() const
{
return mgmtNodes;
}
NodeStoreServers* getMetaNodes() const
{
return metaNodes;
}
NodeStoreServers* getStorageNodes() const
{
return storageNodes;
}
TargetMapper* getTargetMapper() const
{
return targetMapper;
}
MirrorBuddyGroupMapper* getMirrorBuddyGroupMapper() const
{
return mirrorBuddyGroupMapper;
}
TargetStateStore* getTargetStateStore() const
{
return targetStateStore;
}
MultiWorkQueue* getWorkQueue(uint16_t targetID) const
{
MultiWorkQueueMapCIter iter = workQueueMap.find(targetID);
if(iter != workQueueMap.end() )
return iter->second;
/* note: it's not unusual to not find given targetID, e.g.
- when per-target queues are disabled
- or when server restarted without one of its targets (and clients don't know that)
- or if client couldn't provide targetID because it's not a target message */
return workQueueMap.begin()->second;
}
MultiWorkQueueMap* getWorkQueueMap()
{
return &workQueueMap;
}
SessionStore* getSessions() const
{
return sessions;
}
StorageNodeOpStats* getNodeOpStats() const
{
return nodeOperationStats;
}
StorageTargets* getStorageTargets() const
{
return storageTargets;
}
SyncedStoragePaths* getSyncedStoragePaths() const
{
return syncedStoragePaths;
}
StorageBenchOperator* getStorageBenchOperator() const
{
return this->storageBenchOperator;
}
DatagramListener* getDatagramListener() const
{
return dgramListener;
}
const StreamLisVec* getStreamListenerVec() const
{
return &streamLisVec;
}
StatsCollector* getStatsCollector() const
{
return statsCollector;
}
InternodeSyncer* getInternodeSyncer() const
{
return internodeSyncer;
}
TimerQueue* getTimerQueue() const
{
return timerQueue;
}
int getAppResult() const
{
return appResult;
}
bool getWorkersRunning()
{
const std::lock_guard<Mutex> lock(mutexWorkersRunning);
return this->workersRunning;
}
ChunkStore* getChunkDirStore() const
{
return this->chunkDirStore;
}
ChunkFetcher* getChunkFetcher() const
{
return this->chunkFetcher;
}
const ExceededQuotaPerTarget* getExceededQuotaStores() const
{
return &exceededQuotaStores;
}
BuddyResyncer* getBuddyResyncer() const
{
return this->buddyResyncer;
}
ChunkLockStore* getChunkLockStore() const
{
return chunkLockStore;
}
WorkerList* getWorkers()
{
return &workerList;
}
StoragePoolStore* getStoragePoolStore() const
{
return storagePoolStore.get();
}
void setLibZfsErrorReported(bool isReported)
{
libZfsErrorReported = isReported;
}
void* getDlOpenHandleLibZfs()
{
if(dlOpenHandleLibZfs)
return dlOpenHandleLibZfs;
else
if(cfg->getQuotaDisableZfsSupport() )
{
if(!libZfsErrorReported)
{
LOG(QUOTA, ERR, "Quota support for ZFS is disabled.");
libZfsErrorReported = true;
}
}
else
if(!libZfsErrorReported)
openLibZfs();
return dlOpenHandleLibZfs;
}
bool isDlOpenHandleLibZfsValid()
{
if(dlOpenHandleLibZfs)
return true;
return false;
}
void findAllowedInterfaces(NicAddressList& outList) const;
void findAllowedRDMAInterfaces(NicAddressList& outList) const;
};

View File

@@ -0,0 +1,256 @@
#include <common/toolkit/StringTk.h>
#include <common/toolkit/UnitTk.h>
#include "Config.h"
#define CONFIG_DEFAULT_CFGFILENAME "/etc/beegfs/beegfs-storage.conf"
#define CONFIG_STORAGETARGETS_DELIMITER ','
Config::Config(int argc, char** argv) :
AbstractConfig(argc, argv)
{
initConfig(argc, argv, true);
}
/**
* Sets the default values for each configurable in the configMap.
*
* @param addDashes currently unused
*/
void Config::loadDefaults(bool addDashes)
{
AbstractConfig::loadDefaults();
// re-definitions
configMapRedefine("cfgFile", "");
// own definitions
configMapRedefine("connInterfacesFile", "");
configMapRedefine("connInterfacesList", "");
configMapRedefine("storeStorageDirectory", "");
configMapRedefine("storeFsUUID", "");
configMapRedefine("storeAllowFirstRunInit", "true");
configMapRedefine("tuneNumStreamListeners", "1");
configMapRedefine("tuneNumWorkers", "8");
configMapRedefine("tuneWorkerBufSize", "4m");
configMapRedefine("tuneProcessFDLimit", "50000");
configMapRedefine("tuneWorkerNumaAffinity", "false");
configMapRedefine("tuneListenerNumaAffinity", "false");
configMapRedefine("tuneListenerPrioShift", "-1");
configMapRedefine("tuneBindToNumaZone", "");
configMapRedefine("tuneFileReadSize", "32k");
configMapRedefine("tuneFileReadAheadTriggerSize", "4m");
configMapRedefine("tuneFileReadAheadSize", "0");
configMapRedefine("tuneFileWriteSize", "64k");
configMapRedefine("tuneFileWriteSyncSize", "0");
configMapRedefine("tuneUsePerUserMsgQueues", "false");
configMapRedefine("tuneDirCacheLimit", "1024");
configMapRedefine("tuneEarlyStat", "false");
configMapRedefine("tuneNumResyncSlaves", "12");
configMapRedefine("tuneNumResyncGatherSlaves", "6");
configMapRedefine("tuneUseAggressiveStreamPoll", "false");
configMapRedefine("tuneUsePerTargetWorkers", "true");
configMapRedefine("quotaEnableEnforcement", "false");
configMapRedefine("quotaDisableZfsSupport", "false");
configMapRedefine("sysResyncSafetyThresholdMins", "10");
configMapRedefine("sysTargetOfflineTimeoutSecs", "180");
configMapRedefine("runDaemonized", "false");
configMapRedefine("pidFile", "");
}
/**
* @param addDashes currently usused
*/
void Config::applyConfigMap(bool enableException, bool addDashes)
{
AbstractConfig::applyConfigMap(false);
for (StringMapIter iter = configMap.begin(); iter != configMap.end();)
{
bool unknownElement = false;
if (iter->first == std::string("logType"))
{
if (iter->second == "syslog")
{
logType = LogType_SYSLOG;
}
else if (iter->second == "logfile")
{
logType = LogType_LOGFILE;
}
else
{
throw InvalidConfigException("The value of config argument logType is invalid.");
}
}
else if (iter->first == std::string("connInterfacesFile"))
connInterfacesFile = iter->second;
else if (iter->first == std::string("connInterfacesList"))
connInterfacesList = iter->second;
else if (iter->first == std::string("storeStorageDirectory"))
{
storageDirectories.clear();
std::list<std::string> split;
StringTk::explode(iter->second, CONFIG_STORAGETARGETS_DELIMITER, &split);
std::transform(
split.begin(), split.end(),
std::back_inserter(storageDirectories),
[] (const std::string& p) {
return Path(StringTk::trim(p));
});
storageDirectories.remove_if(std::mem_fn(&Path::empty));
}
else if (iter->first == std::string("storeFsUUID"))
{
storeFsUUID.clear();
std::list<std::string> split;
StringTk::explode(iter->second, CONFIG_STORAGETARGETS_DELIMITER, &split);
std::transform(
split.begin(), split.end(),
std::back_inserter(storeFsUUID),
[] (const std::string& p) {
return StringTk::trim(p);
});
storeFsUUID.remove_if(std::mem_fn(&std::string::empty));
}
else if (iter->first == std::string("storeAllowFirstRunInit"))
storeAllowFirstRunInit = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneNumStreamListeners"))
tuneNumStreamListeners = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneNumWorkers"))
tuneNumWorkers = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneWorkerBufSize"))
tuneWorkerBufSize = UnitTk::strHumanToInt64(iter->second);
else if (iter->first == std::string("tuneProcessFDLimit"))
tuneProcessFDLimit = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneWorkerNumaAffinity"))
tuneWorkerNumaAffinity = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneListenerNumaAffinity"))
tuneListenerNumaAffinity = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneBindToNumaZone"))
{
if (iter->second.empty()) // not defined => disable
tuneBindToNumaZone = -1; // -1 means disable binding
else
tuneBindToNumaZone = StringTk::strToInt(iter->second);
}
else if (iter->first == std::string("tuneListenerPrioShift"))
tuneListenerPrioShift = StringTk::strToInt(iter->second);
else if (iter->first == std::string("tuneFileReadSize"))
tuneFileReadSize = UnitTk::strHumanToInt64(iter->second);
else if (iter->first == std::string("tuneFileReadAheadTriggerSize"))
tuneFileReadAheadTriggerSize = UnitTk::strHumanToInt64(iter->second);
else if (iter->first == std::string("tuneFileReadAheadSize"))
tuneFileReadAheadSize = UnitTk::strHumanToInt64(iter->second);
else if (iter->first == std::string("tuneFileWriteSize"))
tuneFileWriteSize = UnitTk::strHumanToInt64(iter->second);
else if (iter->first == std::string("tuneFileWriteSyncSize"))
tuneFileWriteSyncSize = UnitTk::strHumanToInt64(iter->second);
else if (iter->first == std::string("tuneUsePerUserMsgQueues"))
tuneUsePerUserMsgQueues = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneDirCacheLimit"))
tuneDirCacheLimit = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneEarlyStat"))
this->tuneEarlyStat = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneNumResyncGatherSlaves"))
this->tuneNumResyncGatherSlaves = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneNumResyncSlaves"))
this->tuneNumResyncSlaves = StringTk::strToUInt(iter->second);
else if (iter->first == std::string("tuneUseAggressiveStreamPoll"))
tuneUseAggressiveStreamPoll = StringTk::strToBool(iter->second);
else if (iter->first == std::string("tuneUsePerTargetWorkers"))
tuneUsePerTargetWorkers = StringTk::strToBool(iter->second);
else if (iter->first == std::string("quotaEnableEnforcement"))
quotaEnableEnforcement = StringTk::strToBool(iter->second);
else if (iter->first == std::string("quotaDisableZfsSupport"))
quotaDisableZfsSupport = StringTk::strToBool(iter->second);
else if (iter->first == std::string("sysResyncSafetyThresholdMins"))
sysResyncSafetyThresholdMins = StringTk::strToInt64(iter->second);
else if (iter->first == std::string("sysTargetOfflineTimeoutSecs"))
{
sysTargetOfflineTimeoutSecs = StringTk::strToUInt(iter->second);
if (sysTargetOfflineTimeoutSecs < 30)
{
throw InvalidConfigException("Invalid sysTargetOfflineTimeoutSecs value "
+ iter->second + " (must be at least 30)");
}
}
else if (iter->first == std::string("runDaemonized"))
runDaemonized = StringTk::strToBool(iter->second);
else if (iter->first == std::string("pidFile"))
pidFile = iter->second;
else
{
// unknown element occurred
unknownElement = true;
if (enableException)
{
throw InvalidConfigException("The config argument '" + iter->first + "' is invalid");
}
}
if (unknownElement)
{
// just skip the unknown element
iter++;
}
else
{
// remove this element from the map
iter = eraseFromConfigMap(iter);
}
}
}
void Config::initImplicitVals()
{
// tuneFileReadAheadTriggerSize (should be ">= tuneFileReadAheadSize")
if(tuneFileReadAheadTriggerSize < tuneFileReadAheadSize)
tuneFileReadAheadTriggerSize = tuneFileReadAheadSize;
// connInterfacesList(/File)
AbstractConfig::initInterfacesList(connInterfacesFile, connInterfacesList);
AbstractConfig::initSocketBufferSizes();
// check if sync_file_range was enabled on a distro that doesn't support it
#ifndef CONFIG_DISTRO_HAS_SYNC_FILE_RANGE
if(tuneFileWriteSyncSize)
{
throw InvalidConfigException(
"Config option is not supported for this distribution: 'tuneFileWriteSyncSize'");
}
#endif
// connAuthHash
AbstractConfig::initConnAuthHash(connAuthFile, &connAuthHash);
}
std::string Config::createDefaultCfgFilename() const
{
struct stat statBuf;
const int statRes = stat(CONFIG_DEFAULT_CFGFILENAME, &statBuf);
if(!statRes && S_ISREG(statBuf.st_mode) )
return CONFIG_DEFAULT_CFGFILENAME; // there appears to be a config file
return ""; // no default file otherwise
}

View File

@@ -0,0 +1,229 @@
#pragma once
#include <common/app/config/AbstractConfig.h>
/**
* Find out whether this distro hash sync_file_range() support (added in linux-2.6.17, glibc 2.6).
* Note: Problem is that RHEL 5 defines SYNC_FILE_RANGE_WRITE, but uses glibc 2.5 which has no
* sync_file_range support, so linker complains about undefined reference.
*/
#ifdef __GNUC__
#include <features.h>
#include <fcntl.h>
#if __GLIBC_PREREQ(2, 6) && defined(SYNC_FILE_RANGE_WRITE)
#define CONFIG_DISTRO_HAS_SYNC_FILE_RANGE
#endif
#endif
class Config : public AbstractConfig
{
public:
Config(int argc, char** argv);
private:
// configurables
std::string connInterfacesFile; // implicitly generates connInterfacesList
std::string connInterfacesList; // comma-separated list
std::list<Path> storageDirectories;
std::list<std::string> storeFsUUID;
bool storeAllowFirstRunInit;
unsigned tuneNumStreamListeners;
unsigned tuneNumWorkers;
unsigned tuneWorkerBufSize;
unsigned tuneProcessFDLimit; // 0 means "don't touch limit"
bool tuneWorkerNumaAffinity;
bool tuneListenerNumaAffinity;
int tuneBindToNumaZone; // bind all threads to this zone, -1 means no binding
int tuneListenerPrioShift;
ssize_t tuneFileReadSize;
ssize_t tuneFileReadAheadTriggerSize; // after how much seq read to start read-ahead
ssize_t tuneFileReadAheadSize; // read-ahead with posix_fadvise(..., POSIX_FADV_WILLNEED)
ssize_t tuneFileWriteSize;
ssize_t tuneFileWriteSyncSize; // after how many of per session data to sync_file_range()
bool tuneUsePerUserMsgQueues; // true to use UserWorkContainer for MultiWorkQueue
unsigned tuneDirCacheLimit;
bool tuneEarlyStat; // stat the chunk file before closing it
unsigned tuneNumResyncGatherSlaves;
unsigned tuneNumResyncSlaves;
bool tuneUseAggressiveStreamPoll; // true to not sleep on epoll in streamlisv2
bool tuneUsePerTargetWorkers; // true to have tuneNumWorkers separate for each target
bool quotaEnableEnforcement;
bool quotaDisableZfsSupport;
int64_t sysResyncSafetyThresholdMins; // minutes to add to last buddy comm timestamp
unsigned sysTargetOfflineTimeoutSecs;
bool runDaemonized;
std::string pidFile;
// internals
virtual void loadDefaults(bool addDashes) override;
virtual void applyConfigMap(bool enableException, bool addDashes) override;
virtual void initImplicitVals() override;
std::string createDefaultCfgFilename() const;
public:
// getters & setters
const std::string& getConnInterfacesList() const
{
return connInterfacesList;
}
const std::list<Path>& getStorageDirectories() const { return storageDirectories; }
const std::list<std::string>& getStoreFsUUID() const
{
return storeFsUUID;
}
bool getStoreAllowFirstRunInit() const
{
return storeAllowFirstRunInit;
}
unsigned getTuneNumStreamListeners() const
{
return tuneNumStreamListeners;
}
unsigned getTuneNumWorkers() const
{
return tuneNumWorkers;
}
unsigned getTuneWorkerBufSize() const
{
return tuneWorkerBufSize;
}
unsigned getTuneProcessFDLimit() const
{
return tuneProcessFDLimit;
}
bool getTuneWorkerNumaAffinity() const
{
return tuneWorkerNumaAffinity;
}
bool getTuneListenerNumaAffinity() const
{
return tuneListenerNumaAffinity;
}
int getTuneBindToNumaZone() const
{
return tuneBindToNumaZone;
}
int getTuneListenerPrioShift() const
{
return tuneListenerPrioShift;
}
ssize_t getTuneFileReadSize() const
{
return tuneFileReadSize;
}
ssize_t getTuneFileReadAheadTriggerSize() const
{
return tuneFileReadAheadTriggerSize;
}
ssize_t getTuneFileReadAheadSize() const
{
return tuneFileReadAheadSize;
}
ssize_t getTuneFileWriteSize() const
{
return tuneFileWriteSize;
}
ssize_t getTuneFileWriteSyncSize() const
{
return this->tuneFileWriteSyncSize;
}
bool getTuneUsePerUserMsgQueues() const
{
return tuneUsePerUserMsgQueues;
}
bool getRunDaemonized() const
{
return runDaemonized;
}
const std::string& getPIDFile() const
{
return pidFile;
}
unsigned getTuneDirCacheLimit() const
{
return tuneDirCacheLimit;
}
bool getTuneEarlyStat() const
{
return this->tuneEarlyStat;
}
bool getQuotaEnableEnforcement() const
{
return quotaEnableEnforcement;
}
void setQuotaEnableEnforcement(bool doQuotaEnforcement)
{
quotaEnableEnforcement = doQuotaEnforcement;
}
bool getQuotaDisableZfsSupport() const
{
return quotaDisableZfsSupport;
}
unsigned getTuneNumResyncGatherSlaves() const
{
return tuneNumResyncGatherSlaves;
}
unsigned getTuneNumResyncSlaves() const
{
return tuneNumResyncSlaves;
}
bool getTuneUseAggressiveStreamPoll() const
{
return tuneUseAggressiveStreamPoll;
}
bool getTuneUsePerTargetWorkers() const
{
return tuneUsePerTargetWorkers;
}
int64_t getSysResyncSafetyThresholdMins() const
{
return sysResyncSafetyThresholdMins;
}
unsigned getSysTargetOfflineTimeoutSecs() const
{
return sysTargetOfflineTimeoutSecs;
}
};

View File

@@ -0,0 +1,61 @@
#include "DatagramListener.h"
#include <common/net/message/NetMessageTypes.h>
DatagramListener::DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
AcknowledgmentStore* ackStore, unsigned short udpPort, bool restrictOutboundInterfaces):
AbstractDatagramListener("DGramLis", netFilter, localNicList, ackStore, udpPort,
restrictOutboundInterfaces)
{
}
DatagramListener::~DatagramListener()
{
}
void DatagramListener::handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg)
{
HighResolutionStats stats; // currently ignored
std::shared_ptr<StandardSocket> sock = findSenderSock(fromAddr->sin_addr);
if (sock == nullptr)
{
log.log(Log_WARNING, "Could not handle incoming message: no socket");
return;
}
NetMessage::ResponseContext rctx(fromAddr, sock.get(), sendBuf, DGRAMMGR_SENDBUF_SIZE, &stats);
const auto messageType = netMessageTypeToStr(msg->getMsgType());
switch(msg->getMsgType() )
{
// valid messages within this context
case NETMSGTYPE_Ack:
case NETMSGTYPE_Dummy:
case NETMSGTYPE_HeartbeatRequest:
case NETMSGTYPE_Heartbeat:
case NETMSGTYPE_MapTargets:
case NETMSGTYPE_PublishCapacities:
case NETMSGTYPE_RemoveNode:
case NETMSGTYPE_RefreshStoragePools:
case NETMSGTYPE_RefreshTargetStates:
case NETMSGTYPE_SetMirrorBuddyGroup:
{
if(!msg->processIncoming(rctx) )
{
LOG(GENERAL, WARNING,
"Problem encountered during handling of incoming message.", messageType);
}
} break;
default:
{ // valid, but not within this context
log.logErr(
"Received a message that is invalid within the current context "
"from: " + Socket::ipaddrToStr(fromAddr->sin_addr) + "; "
"type: " + messageType );
} break;
};
}

View File

@@ -0,0 +1,20 @@
#pragma once
#include <common/components/AbstractDatagramListener.h>
class DatagramListener : public AbstractDatagramListener
{
public:
DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
AcknowledgmentStore* ackStore, unsigned short udpPort,
bool restrictOutboundInterfaces);
virtual ~DatagramListener();
protected:
virtual void handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg);
private:
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,144 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/components/AbstractDatagramListener.h>
#include <common/components/ComponentInitException.h>
#include <common/nodes/NodeStoreServers.h>
#include <common/threading/PThread.h>
#include <common/Common.h>
class StorageTarget;
class InternodeSyncer : public PThread
{
public:
InternodeSyncer();
virtual ~InternodeSyncer();
static bool downloadAndSyncTargetStates(UInt16List& outTargetIDs,
UInt8List& outReachabilityStates, UInt8List& outConsistencyStates);
static bool downloadAndSyncNodes();
static bool downloadAndSyncTargetMappings();
static bool downloadAndSyncMirrorBuddyGroups();
static bool downloadAndSyncStoragePools();
static bool downloadAllExceededQuotaLists(
const std::map<uint16_t, std::unique_ptr<StorageTarget>>& targets);
static bool downloadExceededQuotaList(uint16_t targetId, QuotaDataType idType,
QuotaLimitType exType, UIntList* outIDList, FhgfsOpsErr& error);
static void syncClientSessions(const std::vector<NodeHandle>& clientsList);
void publishTargetState(uint16_t targetID, TargetConsistencyState targetState);
bool publishLocalTargetStateChanges(const TargetStateMap& oldStates,
const TargetStateMap& changes);
static bool registerNode(AbstractDatagramListener* dgramLis);
static bool registerTargetMappings();
static void requestBuddyTargetStates();
private:
LogContext log;
Mutex forceTargetStatesUpdateMutex;
Mutex forcePublishCapacitiesMutex;
Mutex forceStoragePoolsUpdateMutex;
Mutex forceCheckNetworkMutex;
bool forceTargetStatesUpdate; // true to force update of target states
bool forcePublishCapacities; // true to force publishing target capacities
bool forceStoragePoolsUpdate; // true to force update of storage pools
bool forceCheckNetwork; // true to force update of network interfaces
virtual void run();
void syncLoop();
// returns true if the local interfaces have changed
bool checkNetwork();
void dropIdleConns();
unsigned dropIdleConnsByStore(NodeStoreServers* nodes);
void updateTargetStatesAndBuddyGroups();
void publishTargetCapacities();
void forceMgmtdPoolsRefresh();
static void printSyncNodesResults(NodeType nodeType, NumNodeIDList* addedNodes,
NumNodeIDList* removedNodes);
bool publishTargetStateChanges(UInt16List& targetIDs, UInt8List& oldStates,
UInt8List& newStates);
static bool downloadAllExceededQuotaLists(uint16_t targetId);
public:
// inliners
void setForceTargetStatesUpdate()
{
std::lock_guard<Mutex> safeLock(forceTargetStatesUpdateMutex);
this->forceTargetStatesUpdate = true;
}
void setForcePublishCapacities()
{
std::lock_guard<Mutex> safeLock(forcePublishCapacitiesMutex);
this->forcePublishCapacities = true;
}
void setForceStoragePoolsUpdate()
{
std::lock_guard<Mutex> lock(forceStoragePoolsUpdateMutex);
forceStoragePoolsUpdate = true;
}
void setForceCheckNetwork()
{
std::lock_guard<Mutex> lock(forceCheckNetworkMutex);
forceCheckNetwork = true;
}
private:
// inliners
bool getAndResetForceTargetStatesUpdate()
{
std::lock_guard<Mutex> safeLock(forceTargetStatesUpdateMutex);
bool retVal = this->forceTargetStatesUpdate;
this->forceTargetStatesUpdate = false;
return retVal;
}
bool getAndResetForcePublishCapacities()
{
std::lock_guard<Mutex> safeLock(forcePublishCapacitiesMutex);
bool retVal = this->forcePublishCapacities;
this->forcePublishCapacities = false;
return retVal;
}
bool getAndResetForceStoragePoolsUpdate()
{
std::lock_guard<Mutex> lock(forceStoragePoolsUpdateMutex);
bool retVal = forceStoragePoolsUpdate;
forceStoragePoolsUpdate = false;
return retVal;
}
bool getAndResetForceCheckNetwork()
{
std::lock_guard<Mutex> lock(forceCheckNetworkMutex);
bool retVal = forceCheckNetwork;
forceCheckNetwork = false;
return retVal;
}
};

View File

@@ -0,0 +1,47 @@
#include <app/App.h>
#include <program/Program.h>
#include "StorageStatsCollector.h"
/**
* Note: Other than the common StatsCollector::collectStats(), this method can handle multiple work
* queues.
*/
void StorageStatsCollector::collectStats()
{
App* app = Program::getApp();
MultiWorkQueueMap* workQueueMap = app->getWorkQueueMap();
HighResolutionStats newStats;
const std::lock_guard<Mutex> lock(mutex);
// get stats from first queue as basis
MultiWorkQueueMapIter iter = workQueueMap->begin();
iter->second->getAndResetStats(&newStats);
// add the stat values from following queues
iter++;
for( ; iter != workQueueMap->end(); iter++)
{
HighResolutionStats currentStats;
iter->second->getAndResetStats(&currentStats);
HighResolutionStatsTk::addHighResRawStats(currentStats, newStats);
HighResolutionStatsTk::addHighResIncStats(currentStats, newStats);
}
// set current stats time
newStats.rawVals.statsTimeMS = TimeAbs().getTimeMS();
// take care of max history length
if(statsList.size() == historyLength)
statsList.pop_back();
// push new stats to front
statsList.push_front(newStats);
}

View File

@@ -0,0 +1,25 @@
#pragma once
#include <common/components/StatsCollector.h>
/**
* Common StatsCollector cannot handle multiple work queues, so this derived class overrides
* the collectStats() method to handle multiple work queues.
*/
class StorageStatsCollector : public StatsCollector
{
public:
StorageStatsCollector(unsigned collectIntervalMS, unsigned historyLength):
StatsCollector(NULL, collectIntervalMS, historyLength)
{
// nothing to be done here
}
virtual ~StorageStatsCollector() {}
protected:
virtual void collectStats();
};

View File

@@ -0,0 +1,38 @@
#include "StorageBenchOperator.h"
int StorageBenchOperator::initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize,
int64_t size, int threads, bool odirect, StorageBenchType type)
{
return this->slave.initAndStartStorageBench(targetIDs, blocksize, size, threads, odirect, type);
}
int StorageBenchOperator::cleanup(UInt16List* targetIDs)
{
return this->slave.cleanup(targetIDs);
}
int StorageBenchOperator::stopBenchmark()
{
return this->slave.stopBenchmark();
}
StorageBenchStatus StorageBenchOperator::getStatusWithResults(UInt16List* targetIDs,
StorageBenchResultsMap* outResults)
{
return this->slave.getStatusWithResults(targetIDs, outResults);
}
void StorageBenchOperator::shutdownBenchmark()
{
this->slave.shutdownBenchmark();
}
void StorageBenchOperator::waitForShutdownBenchmark()
{
this->slave.waitForShutdownBenchmark();
}

View File

@@ -0,0 +1,45 @@
#pragma once
#include "StorageBenchSlave.h"
class StorageBenchOperator
{
public:
StorageBenchOperator() {}
int initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize, int64_t size,
int threads, bool odirect, StorageBenchType type);
int cleanup(UInt16List* targetIDs);
int stopBenchmark();
StorageBenchStatus getStatusWithResults(UInt16List* targetIDs,
StorageBenchResultsMap* outResults);
void shutdownBenchmark();
void waitForShutdownBenchmark();
private:
StorageBenchSlave slave;
protected:
public:
// inliners
StorageBenchStatus getStatus()
{
return this->slave.getStatus();
}
StorageBenchType getType()
{
return this->slave.getType();
}
int getLastRunErrorCode()
{
return this->slave.getLastRunErrorCode();
}
};

View File

@@ -0,0 +1,832 @@
#include <common/system/System.h>
#include <common/toolkit/StorageTk.h>
#include <common/toolkit/StringTk.h>
#include <components/worker/StorageBenchWork.h>
#include <program/Program.h>
#include "StorageBenchSlave.h"
#include <mutex>
#define STORAGEBENCH_STORAGE_SUBDIR_NAME "benchmark"
#define STORAGEBENCH_READ_PIPE_TIMEOUT_MS 2000
/*
* initialize and starts the storage benchmark with the given informations
*
* @param targetIDs a list with the targetIDs which the benchmark tests
* @param blocksize the blocksize for the benchmark
* @param size the size for the benchmark
* @param threads the number (simulated clients) of threads for the benchmark
* @param type the type of the benchmark
* @return the error code, 0 if the benchmark was initialize successful (STORAGEBENCH_ERROR..)
*
*/
int StorageBenchSlave::initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize,
int64_t size, int threads, bool odirect, StorageBenchType type)
{
const char* logContext = "Storage Benchmark (init)";
int lastError = STORAGEBENCH_ERROR_NO_ERROR;
int retVal = STORAGEBENCH_ERROR_NO_ERROR;
this->resetSelfTerminate();
const std::lock_guard<Mutex> lock(statusMutex);
if (STORAGEBENCHSTATUS_IS_ACTIVE(this->status))
{
LogContext(logContext).logErr(
std::string("Benchmark is already running. It's not possible to start a benchmark if a"
"benchmark is running."));
retVal = STORAGEBENCH_ERROR_RUNTIME_IS_RUNNING;
}
else
{
retVal = initStorageBench(targetIDs, blocksize, size, threads, odirect, type);
}
if(retVal == STORAGEBENCH_ERROR_NO_ERROR)
{
if (this->status != StorageBenchStatus_INITIALISED)
{
LogContext(logContext).logErr(
std::string("Benchmark not correctly initialized."));
this->lastRunErrorCode = STORAGEBENCH_ERROR_UNINITIALIZED;
this->status = StorageBenchStatus_ERROR;
retVal = STORAGEBENCH_ERROR_UNINITIALIZED;
}
else
{
try
{
this->start();
this->status = StorageBenchStatus_RUNNING;
lastError = this->lastRunErrorCode;
}
catch(PThreadCreateException& e)
{
LogContext(logContext).logErr(std::string("Unable to start thread: ") + e.what() );
this->status = StorageBenchStatus_ERROR;
lastError = this->lastRunErrorCode;
}
}
}
if(lastError != STORAGEBENCH_ERROR_NO_ERROR)
{
retVal = lastError;
}
return retVal;
}
/*
* initialize the storage benchmark with the given informations
*
* @param targetIDs a list with the targetIDs which the benchmark tests
* @param blocksize the blocksize for the benchmark
* @param size the size for the benchmark
* @param threads the number (simulated clients) of threads for the benchmark
* @param type the type of the benchmark
* @return the error code, 0 if the benchmark was initialize successful (STORAGEBENCH_ERROR..)
*
*/
int StorageBenchSlave::initStorageBench(UInt16List* targetIDs, int64_t blocksize,
int64_t size, int threads, bool odirect, StorageBenchType type)
{
const char* logContext = "Storage Benchmark (init)";
LogContext(logContext).log(Log_DEBUG, "Initializing benchmark ...");
this->benchType = type;
this->targetIDs = new auto(*targetIDs);
this->blocksize = blocksize;
this->size = size;
this->numThreads = threads;
this->odirect = odirect;
this->numThreadsDone = 0;
initThreadData();
if (!initTransferData())
{
this->lastRunErrorCode = STORAGEBENCH_ERROR_INIT_TRANSFER_DATA;
this->status = StorageBenchStatus_ERROR;
return STORAGEBENCH_ERROR_INIT_TRANSFER_DATA;
}
if (this->benchType == StorageBenchType_READ)
{
if (!checkReadData())
{
LogContext(logContext).logErr(
std::string("No (or not enough) data for read benchmark available. "
"Start a write benchmark with the same size parameter before the read benchmark.") );
this->lastRunErrorCode = STORAGEBENCH_ERROR_INIT_READ_DATA;
this->status = StorageBenchStatus_ERROR;
return STORAGEBENCH_ERROR_INIT_READ_DATA;
}
}
else
if (this->benchType == StorageBenchType_WRITE)
{
if (!createBenchmarkFolder() )
{
LogContext(logContext).logErr(
std::string("Couldn't create the benchmark folder."));
this->lastRunErrorCode = STORAGEBENCH_ERROR_INIT_CREATE_BENCH_FOLDER;
this->status = StorageBenchStatus_ERROR;
return STORAGEBENCH_ERROR_INIT_CREATE_BENCH_FOLDER;
}
}
else
{
LogContext(logContext).logErr(std::string(
"Unknown benchmark type: " + StringTk::uintToStr(this->benchType) ) );
return STORAGEBENCH_ERROR_INITIALIZATION_ERROR;
}
this->lastRunErrorCode = STORAGEBENCH_ERROR_NO_ERROR;
this->status = StorageBenchStatus_INITIALISED;
LogContext(logContext).log(Log_DEBUG, std::string("Benchmark initialized."));
return STORAGEBENCH_ERROR_NO_ERROR;
}
/*
* initialize the data which will be written to the disk, the size of the transfer data a equal
* to the blocksize and initialized with random characters
*
* @return true if the random data are initialized,
* false if a error occurred
*
*/
bool StorageBenchSlave::initTransferData()
{
const char* logContext = "Storage Benchmark (init buf)";
LogContext(logContext).log(Log_DEBUG, std::string("Initializing random data..."));
void* rawTransferData;
if (posix_memalign(&rawTransferData, 4096, blocksize) != 0)
return false;
transferData.reset(static_cast<char*>(rawTransferData));
Random randomizer = Random();
for (int64_t counter = 0; counter < this->blocksize; counter++)
{
this->transferData[counter] = randomizer.getNextInt();
}
LogContext(logContext).log(Log_DEBUG, std::string("Random data initialized."));
return true;
}
/*
* frees the transfer data
*/
void StorageBenchSlave::freeTransferData()
{
transferData.reset();
}
/*
* initialize the informations about the threads
*
*/
void StorageBenchSlave::initThreadData()
{
const char* logContext = "Storage Benchmark (init)";
LogContext(logContext).log(Log_DEBUG, std::string("Initializing thread data..."));
this->threadData.clear();
int allThreadCounter = 0;
for (UInt16ListIter iter = targetIDs->begin(); iter != targetIDs->end(); iter++)
{
for (int threadCount = 0; threadCount < this->numThreads; threadCount++)
{
StorageBenchThreadData data;
data.targetID = *iter;
data.targetThreadID = threadCount;
data.engagedSize = 0;
data.fileDescriptor = 0;
data.neededTime = 0;
this->threadData[allThreadCounter] = data;
allThreadCounter++;
}
}
LogContext(logContext).log(Log_DEBUG, "Thread data initialized.");
}
/*
* starts the benchmark, a read or a write benchmark
*
*/
void StorageBenchSlave::run()
{
const char* logContext = "Storage Benchmark (run)";
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark started..."));
App* app = Program::getApp();
bool openRes = openFiles();
if (openRes)
{
this->startTime.setToNow();
// add a work package into the worker queue for every thread
for(StorageBenchThreadDataMapIter iter = threadData.begin();
iter != threadData.end();
iter++)
{
LOG_DEBUG(logContext, Log_DEBUG, std::string("Add work for target: ") +
StringTk::uintToStr(iter->second.targetID) );
LOG_DEBUG(logContext, Log_DEBUG, std::string("- threadID: ") +
StringTk::intToStr(iter->first) );
LOG_DEBUG(logContext, Log_DEBUG, std::string("- type: ") +
StringTk::intToStr(this->benchType) );
StorageBenchWork* work = new StorageBenchWork(iter->second.targetID, iter->first,
iter->second.fileDescriptor, this->benchType, getNextPackageSize(iter->first),
this->threadCommunication, this->transferData.get());
app->getWorkQueue(iter->second.targetID)->addIndirectWork(work);
}
while(getStatus() == StorageBenchStatus_RUNNING)
{
int threadID = 0;
if (this->threadCommunication->waitForIncomingData(STORAGEBENCH_READ_PIPE_TIMEOUT_MS))
{
this->threadCommunication->getReadFD()->readExact(&threadID, sizeof(int));
}
else
{
threadID = STORAGEBENCH_ERROR_COM_TIMEOUT;
}
if (this->getSelfTerminate())
{
LogContext(logContext).logErr(std::string("Abort benchmark."));
this->lastRunErrorCode = STORAGEBENCH_ERROR_ABORT_BENCHMARK;
setStatus(StorageBenchStatus_STOPPING);
if (threadID != STORAGEBENCH_ERROR_COM_TIMEOUT)
{
this->threadData[threadID].neededTime = this->startTime.elapsedMS();
this->numThreadsDone++;
}
break;
}
else
if (threadID == STORAGEBENCH_ERROR_WORKER_ERROR)
{
LogContext(logContext).logErr(std::string("I/O operation on disk failed."));
this->lastRunErrorCode = STORAGEBENCH_ERROR_WORKER_ERROR;
setStatus(StorageBenchStatus_STOPPING);
// increment the thread counter, because the thread which sent this error hasn't a
// work package in the queue of the workers but the response from the other threads
// must be collected
this->numThreadsDone++;
break;
}
else
if (threadID == STORAGEBENCH_ERROR_COM_TIMEOUT)
{
continue;
}
else
if ( (threadID < -1) || ( ( (unsigned)threadID) >= this->threadData.size() ) )
{ // error if the worker reports an unknown threadID
std::string errorMessage("Unknown thread ID: " + StringTk::intToStr(threadID) + "; "
"map size: " + StringTk::uintToStr(this->threadData.size() ) );
LogContext(logContext).logErr(errorMessage);
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_ERROR;
setStatus(StorageBenchStatus_STOPPING);
// increment the thread counter, because the thread which sent this error hasn't a
// work package in the queue of the workers but the response from the other threads
// must be collected
this->numThreadsDone++;
break;
}
StorageBenchThreadData* currentData = &this->threadData[threadID];
int64_t workSize = getNextPackageSize(threadID);
// add a new work package into the workers queue for the reported thread only if the
// data size for the thread is bigger then 0
if (workSize != 0)
{
StorageBenchWork* work = new StorageBenchWork(currentData->targetID, threadID,
currentData->fileDescriptor, this->benchType, workSize, this->threadCommunication,
this->transferData.get());
app->getWorkQueue(currentData->targetID)->addIndirectWork(work);
}
else
{
// the thread has finished his work
currentData->neededTime = this->startTime.elapsedMS();
this->numThreadsDone++;
}
if (this->numThreadsDone >= this->threadData.size())
{
setStatus(StorageBenchStatus_FINISHING);
}
}
//collect all responses from the worker
while ( (this->numThreadsDone < this->threadData.size()) && app->getWorkersRunning() )
{
int threadID = 0;
if (this->threadCommunication->waitForIncomingData(STORAGEBENCH_READ_PIPE_TIMEOUT_MS))
{
this->threadCommunication->getReadFD()->readExact(&threadID, sizeof(int));
}
else
{
continue;
}
LOG_DEBUG(logContext, Log_DEBUG, std::string("Collect response from worker."));
if(threadID >= 0)
this->threadData[threadID].neededTime = this->startTime.elapsedMS();
this->numThreadsDone++;
}
// all workers finished/stopped ==> close all files
closeFiles();
freeTransferData();
// all threads have finished the work or the benchmark was stopped, set new status
if (this->getStatus() == StorageBenchStatus_FINISHING)
{
this->setStatus(StorageBenchStatus_FINISHED);
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark finished."));
}
else
if (this->getStatus() == StorageBenchStatus_STOPPING)
{
if (this->lastRunErrorCode != STORAGEBENCH_ERROR_NO_ERROR)
{
this->setStatus(StorageBenchStatus_ERROR);
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark stopped with errors."));
}
else
{
this->setStatus(StorageBenchStatus_STOPPED);
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark stopped."));
}
}
}
else
{
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_OPEN_FILES;
setStatus(StorageBenchStatus_ERROR);
}
}
/*
* checks the size of the benchmark files, the benchmark files must be big enough for the
* read benchmark
*
* @return true if data for a read benchmark exists,
* false if the files to small or a error occurred
*
*/
bool StorageBenchSlave::checkReadData()
{
const char* logContext = "Storage Benchmark (check)";
for (StorageBenchThreadDataMapIter iter = threadData.begin();
iter != threadData.end(); iter++)
{
auto* const target = Program::getApp()->getStorageTargets()->getTarget(iter->second.targetID);
if (!target)
{
LogContext(logContext).logErr(std::string("TargetID unknown."));
return false;
}
std::string path = target->getPath().str();
path = path + "/" + STORAGEBENCH_STORAGE_SUBDIR_NAME + "/" +
StringTk::uintToStr(iter->second.targetThreadID);
int error = -1;
struct stat fileStat;
error = stat(path.c_str(), &fileStat);
if (error != -1)
{
if (fileStat.st_size < this->size)
{
LogContext(logContext).logErr(std::string("Existing benchmark file too small. "
"Requested file size: " + StringTk::int64ToStr(this->size) + " "
"File size: " + StringTk::intToStr(fileStat.st_size)));
return false;
}
}
else
{
LogContext(logContext).logErr(std::string("Couldn't stat() benchmark file. SysErr: ") +
System::getErrString() );
return false;
}
}
return true;
}
/*
* creates the benchmark folder in the storage target folder
*
* @return true if all benchmark folders are created,
* false if a error occurred
*
*/
bool StorageBenchSlave::createBenchmarkFolder()
{
const char* logContext = "Storage Benchmark (mkdir)";
for(UInt16ListIter iter = this->targetIDs->begin(); iter != this->targetIDs->end(); iter++)
{
auto* const target = Program::getApp()->getStorageTargets()->getTarget(*iter);
if (!target)
{
LogContext(logContext).logErr("TargetID unknown: " + StringTk::uintToStr(*iter) );
return false;
}
Path currentPath(target->getPath() / STORAGEBENCH_STORAGE_SUBDIR_NAME);
if(!StorageTk::createPathOnDisk(currentPath, false))
{
LogContext(logContext).logErr(
std::string("Unable to create benchmark directory: " + currentPath.str() ) );
return false;
}
}
return true;
}
/*
* opens all needed files for the benchmark. This method will be executed at the start
* of the benchmark
*
* @return true if all files are opened,
* false if a error occurred
*
*/
bool StorageBenchSlave::openFiles()
{
const char* logContext = "Storage Benchmark (open)";
mode_t openMode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
for(StorageBenchThreadDataMapIter iter = threadData.begin();
iter != threadData.end();
iter++)
{
auto* const target = Program::getApp()->getStorageTargets()->getTarget(iter->second.targetID);
if (!target)
{
LogContext(logContext).logErr(
"TargetID unknown: " + StringTk::uintToStr(iter->second.targetID) );
return false;
}
std::string path = target->getPath().str();
path = path + "/" STORAGEBENCH_STORAGE_SUBDIR_NAME "/" +
StringTk::uintToStr(iter->second.targetThreadID);
int fileDescriptor = -1;
// open file
int directFlag = this->odirect ? O_DIRECT : 0;
if(this->benchType == StorageBenchType_READ)
fileDescriptor = open(path.c_str(), O_RDONLY | directFlag);
else
fileDescriptor = open(path.c_str(), O_CREAT | O_WRONLY | O_TRUNC | directFlag, openMode);
if (fileDescriptor != -1)
iter->second.fileDescriptor = fileDescriptor;
else
{ // open failed
LogContext(logContext).logErr("Couldn't open benchmark file: " + path + "; "
"SysErr: " + System::getErrString() );
return false;
}
}
return true;
}
bool StorageBenchSlave::closeFiles()
{
const char* logContext = "Storage Benchmark (close)";
bool retVal = true;
for(StorageBenchThreadDataMapIter iter = threadData.begin();
iter != threadData.end();
iter++)
{
int tmpRetVal = close(iter->second.fileDescriptor);
if (tmpRetVal != 0)
{
int closeErrno = errno;
auto* const target = Program::getApp()->getStorageTargets()->getTarget(
iter->second.targetID);
if (!target)
{
LogContext(logContext).logErr(
"TargetID unknown: " + StringTk::uintToStr(iter->second.targetID) );
return false;
}
std::string path = target->getPath().str();
path = path + "/" + STORAGEBENCH_STORAGE_SUBDIR_NAME + "/" +
StringTk::uintToStr(iter->second.targetThreadID);
LogContext(logContext).logErr("Couldn't close file: " + path + "; "
"SysErr: " + System::getErrString(closeErrno) );
retVal = false;
}
}
return retVal;
}
/*
* calculates the size (bytes) of the data which will be written on the disk by the worker with
* the next work package for the given thread
*
* @param threadID the threadID
* @return the size of the data for next work package in bytes,
* if 0 the given thread has written all data
*
*/
int64_t StorageBenchSlave::getNextPackageSize(int threadID)
{
int64_t retVal = BEEGFS_MIN(this->blocksize,
this->size - this->threadData[threadID].engagedSize);
this->threadData[threadID].engagedSize += retVal;
return retVal;
}
/*
* calculates the throughput (kB/s) of the given target
*
* @param targetID the targetID
* @return the throughput of the given target in kilobytes per second
*
*/
int64_t StorageBenchSlave::getResult(uint16_t targetID)
{
int64_t size = 0;
int64_t time = 0;
for(StorageBenchThreadDataMapIter iter = this->threadData.begin();
iter != this->threadData.end();
iter++)
{
if (iter->second.targetID == targetID)
{
// summarize the size of the different threads which worked on a target
size += iter->second.engagedSize;
// search the thread with the longest runtime
if (time < this->threadData[iter->first].neededTime)
time = this->threadData[iter->first].neededTime;
}
}
// if the threads are not finished use the needed time up to now
if (time == 0)
time = this->startTime.elapsedMS();
// if no results available return zero
if ( (size == 0) || (time == 0) )
return 0;
// input: size in bytes, time in milliseconds,
// output: in kilobytes per second
return ( (size * 1000) / (time * 1024) );
}
/*
* calculates the throughput (kB/s) of the given targets
*
* @param targetIDs the list of targetIDs
* @param outResults a initialized map for the results, which contains the results after
* execution of the method
*
*/
void StorageBenchSlave::getResults(UInt16List* targetIDs, StorageBenchResultsMap* outResults)
{
for (UInt16ListIter iter = targetIDs->begin(); iter != targetIDs->end(); iter++)
{
(*outResults)[*iter] = getResult(*iter);
}
}
/*
* calculates the throughput (kB/s) of all targets
*
* @param outResults a initialized map for the results, which contains the results after
* execution of the method
*
*/
void StorageBenchSlave::getAllResults(StorageBenchResultsMap* outResults)
{
for (UInt16ListIter iter = this->targetIDs->begin(); iter != this->targetIDs->end(); iter++)
{
(*outResults)[*iter] = getResult(*iter);
}
}
/*
* calculates the throughput (kB/s) of the given targets and returns the status of the benchmark
*
* @param targetIDs the list of targetIDs
* @param outResults a initialized map for the results, which contains the results after
* execution of the method
* @return the status of the benchmark
*
*/
StorageBenchStatus StorageBenchSlave::getStatusWithResults(UInt16List* targetIDs,
StorageBenchResultsMap* outResults)
{
getResults(targetIDs, outResults);
return getStatus();
}
/*
* stop the benchmark
*
* @return the error code, 0 if the benchmark will stop (STORAGEBENCH_ERROR..)
*
*/
int StorageBenchSlave::stopBenchmark()
{
const std::lock_guard<Mutex> lock(statusMutex);
if (this->status == StorageBenchStatus_RUNNING)
{
this->status = StorageBenchStatus_STOPPING;
return STORAGEBENCH_ERROR_NO_ERROR;
}
else
if(this->status == StorageBenchStatus_FINISHING || this->status == StorageBenchStatus_STOPPING)
{
return STORAGEBENCH_ERROR_NO_ERROR;
}
return STORAGEBENCH_ERROR_NO_ERROR;
}
/*
* deletes all files in the benchmark folder of the given targets
*
* @param targetIDs the list of targetIDs which will be cleaned
* @return the error code, 0 if the cleanup was successful (STORAGEBENCH_ERROR..)
*
*/
int StorageBenchSlave::cleanup(UInt16List* targetIDs)
{
const std::lock_guard<Mutex> lock(statusMutex);
const char* logContext = "Storage Benchmark (cleanup)";
//cleanup only possible if no benchmark is running
if (STORAGEBENCHSTATUS_IS_ACTIVE(this->status))
{
LogContext(logContext).logErr("Cleanup not possible benchmark is running");
return STORAGEBENCH_ERROR_RUNTIME_CLEANUP_JOB_ACTIVE;
}
for(UInt16ListIter iter = targetIDs->begin(); iter != targetIDs->end(); iter++)
{
auto* const target = Program::getApp()->getStorageTargets()->getTarget(*iter);
if (!target)
{
LogContext(logContext).logErr(std::string("TargetID unknown."));
return STORAGEBENCH_ERROR_RUNTIME_UNKNOWN_TARGET;
}
std::string path = target->getPath().str();
path.append("/");
path.append(STORAGEBENCH_STORAGE_SUBDIR_NAME);
path.append("/");
DIR* dir = opendir(path.c_str());
if (dir == NULL)
{
int openDirErrno = errno;
int errRetVal;
if (openDirErrno == ENOENT)
{ // benchmark directory doesn't exist, no benchmark data for cleanup
errRetVal = STORAGEBENCH_ERROR_NO_ERROR;
}
else
{
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
errRetVal = STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
LogContext(logContext).logErr("Unable to delete files in benchmark directory: " + path +
"; failed with SysErr: " + System::getErrString(errno));
}
return errRetVal;
}
struct dirent* dirEntry = StorageTk::readdirFiltered(dir);
while (dirEntry)
{
struct stat statData;
std::string filePath(path + dirEntry->d_name);
int retVal = stat(filePath.c_str(), &statData);
if ((retVal == 0) && (S_ISREG(statData.st_mode)) )
{
int error = unlink(filePath.c_str());
if(error != 0)
{
LogContext(logContext).logErr(
std::string("Unable to delete files in benchmark directory: "
+ path));
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
closedir(dir);
return STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
}
}
else
if(!S_ISREG(statData.st_mode))
LogContext(logContext).logErr("Unable to delete files in benchmark directory: " +
path + " It's not a regular file.");
else
LogContext(logContext).logErr("Unable to delete files in benchmark directory: " + path);
dirEntry = StorageTk::readdirFiltered(dir);
}
closedir(dir);
}
return STORAGEBENCH_ERROR_NO_ERROR;
}
/*
* aborts the benchmark, will be used if SIGINT received
*
*/
void StorageBenchSlave::shutdownBenchmark()
{
this->selfTerminate();
}
void StorageBenchSlave::waitForShutdownBenchmark()
{
const std::lock_guard<Mutex> lock(statusMutex);
while(STORAGEBENCHSTATUS_IS_ACTIVE(this->status))
{
this->statusChangeCond.wait(&this->statusMutex);
}
}

View File

@@ -0,0 +1,145 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/benchmark/StorageBench.h>
#include <common/threading/Condition.h>
#include <common/threading/PThread.h>
#include <common/toolkit/Pipe.h>
#include <common/toolkit/TimeFine.h>
#include <common/Common.h>
#include <mutex>
// struct for the informations about a thread which simulates a client
struct StorageBenchThreadData
{
uint16_t targetID;
int targetThreadID;
int64_t engagedSize; // amount of data which was submitted for write/read
int fileDescriptor;
int64_t neededTime;
};
// deleter functor for transferData
struct TransferDataDeleter {
void operator()(char* transferData) { free(transferData); }
};
// map for the informations about a thread; key: virtual threadID, value: information about thread
typedef std::map<int, StorageBenchThreadData> StorageBenchThreadDataMap;
typedef StorageBenchThreadDataMap::iterator StorageBenchThreadDataMapIter;
typedef StorageBenchThreadDataMap::const_iterator StorageBenchThreadDataMapCIter;
typedef StorageBenchThreadDataMap::value_type StorageBenchThreadDataMapVal;
class StorageBenchSlave : public PThread
{
public:
StorageBenchSlave()
: PThread("StorageBenchSlave"),
threadCommunication(new Pipe(false, false) ),
log("Storage Benchmark"),
lastRunErrorCode(STORAGEBENCH_ERROR_NO_ERROR),
status(StorageBenchStatus_UNINITIALIZED),
benchType(StorageBenchType_NONE),
blocksize(1), // useless defaults
size(1), // useless defaults
numThreads(1), // useless defaults
numThreadsDone(0),
targetIDs(NULL),
transferData(nullptr)
{ }
virtual ~StorageBenchSlave()
{
SAFE_DELETE(this->threadCommunication);
SAFE_DELETE(this->targetIDs);
}
int initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize, int64_t size,
int threads, bool odirect, StorageBenchType type);
int cleanup(UInt16List* targetIDs);
int stopBenchmark();
StorageBenchStatus getStatusWithResults(UInt16List* targetIDs,
StorageBenchResultsMap* outResults);
void shutdownBenchmark();
void waitForShutdownBenchmark();
protected:
private:
Pipe* threadCommunication;
Mutex statusMutex;
Condition statusChangeCond;
LogContext log;
int lastRunErrorCode; // STORAGEBENCH_ERROR_...
StorageBenchStatus status;
StorageBenchType benchType;
int64_t blocksize;
int64_t size;
int numThreads;
bool odirect;
unsigned int numThreadsDone;
UInt16List* targetIDs;
StorageBenchThreadDataMap threadData;
std::unique_ptr<char[], TransferDataDeleter> transferData;
TimeFine startTime;
virtual void run();
int initStorageBench(UInt16List* targetIDs, int64_t blocksize, int64_t size,
int threads, bool odirect, StorageBenchType type);
bool initTransferData(void);
void initThreadData();
void freeTransferData();
bool checkReadData(void);
bool createBenchmarkFolder(void);
bool openFiles(void);
bool closeFiles(void);
int64_t getNextPackageSize(int threadID);
int64_t getResult(uint16_t targetID);
void getResults(UInt16List* targetIDs, StorageBenchResultsMap* outResults);
void getAllResults(StorageBenchResultsMap* outResults);
void setStatus(StorageBenchStatus newStatus)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->status = newStatus;
this->statusChangeCond.broadcast();
}
public:
//public inliners
int getLastRunErrorCode()
{
return this->lastRunErrorCode;
}
StorageBenchStatus getStatus()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->status;
}
StorageBenchType getType()
{
return this->benchType;
}
UInt16List* getTargetIDs()
{
return this->targetIDs;
}
};

View File

@@ -0,0 +1,745 @@
#include <program/Program.h>
#include <common/components/worker/IncSyncedCounterWork.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
#include <common/toolkit/StringTk.h>
#include "BuddyResyncJob.h"
#include <boost/lexical_cast.hpp>
#define BUDDYRESYNCJOB_MAXDIRWALKDEPTH 2
BuddyResyncJob::BuddyResyncJob(uint16_t targetID) :
PThread("BuddyResyncJob_" + StringTk::uintToStr(targetID)),
targetID(targetID),
status(BuddyResyncJobState_NOTSTARTED),
startTime(0), endTime(0)
{
App* app = Program::getApp();
unsigned numGatherSlaves = app->getConfig()->getTuneNumResyncGatherSlaves();
unsigned numSyncSlavesTotal = app->getConfig()->getTuneNumResyncSlaves();
unsigned numFileSyncSlaves = BEEGFS_MAX((numSyncSlavesTotal / 2), 1);
unsigned numDirSyncSlaves = BEEGFS_MAX((numSyncSlavesTotal / 2), 1);
// prepare slaves (vectors) and result vector
gatherSlaveVec.resize(numGatherSlaves);
fileSyncSlaveVec.resize(numFileSyncSlaves);
dirSyncSlaveVec.resize(numDirSyncSlaves);
}
BuddyResyncJob::~BuddyResyncJob()
{
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin(); iter != gatherSlaveVec.end();
iter++)
{
BuddyResyncerGatherSlave* slave = *iter;
SAFE_DELETE(slave);
}
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
iter != fileSyncSlaveVec.end(); iter++)
{
BuddyResyncerFileSyncSlave* slave = *iter;
SAFE_DELETE(slave);
}
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
iter != dirSyncSlaveVec.end(); iter++)
{
BuddyResyncerDirSyncSlave* slave = *iter;
SAFE_DELETE(slave);
}
}
void BuddyResyncJob::run()
{
// make sure only one job at a time can run!
{
std::lock_guard<Mutex> mutexLock(statusMutex);
if (status == BuddyResyncJobState_RUNNING)
{
LogContext(__func__).logErr("Refusing to run same BuddyResyncJob twice!");
return;
}
else
{
status = BuddyResyncJobState_RUNNING;
startTime = time(NULL);
endTime = 0;
}
}
App* app = Program::getApp();
StorageTargets* storageTargets = app->getStorageTargets();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
TargetMapper* targetMapper = app->getTargetMapper();
NodeStoreServers* storageNodes = app->getStorageNodes();
WorkerList* workerList = app->getWorkers();
bool startGatherSlavesRes;
bool startSyncSlavesRes;
std::string targetPath;
std::string chunksPath;
bool buddyCommIsOverride = false; // treat errors during lastbuddycomm read as "0, no override"
int64_t lastBuddyCommTimeSecs;
int64_t lastBuddyCommSafetyThresholdSecs;
bool checkTopLevelDirRes;
bool walkRes;
auto& target = *storageTargets->getTargets().at(targetID);
shallAbort.setZero();
targetWasOffline = false;
// delete sync candidates and gather queue; just in case there was something from a previous run
syncCandidates.clear();
gatherSlavesWorkQueue.clear();
target.setBuddyResyncInProgress(true);
LogContext(__func__).log(Log_NOTICE,
"Started resync of targetID " + StringTk::uintToStr(targetID));
// before starting the threads make sure every worker knows about the resync (the current work
// package must be finished), for that we use a dummy package
Mutex mutex;
Condition counterIncrementedCond;
SynchronizedCounter numReadyWorkers;
size_t numWorkers = workerList->size();
for (WorkerListIter iter = workerList->begin(); iter != workerList->end(); iter++)
{
Worker* worker = *iter;
PersonalWorkQueue* personalQueue = worker->getPersonalWorkQueue();
MultiWorkQueue* workQueue = worker->getWorkQueue();
IncSyncedCounterWork* incCounterWork = new IncSyncedCounterWork(&numReadyWorkers);
workQueue->addPersonalWork(incCounterWork, personalQueue);
}
numReadyWorkers.waitForCount(numWorkers);
// notify buddy, that resync started and wait for confirmation
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(targetID);
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto buddyNode = storageNodes->referenceNode(buddyNodeID);
StorageResyncStartedMsg storageResyncStartedMsg(buddyTargetID);
const auto respMsg = MessagingTk::requestResponse(*buddyNode, storageResyncStartedMsg,
NETMSGTYPE_StorageResyncStartedResp);
std::pair<bool, std::chrono::system_clock::time_point> lastBuddyComm;
if (!respMsg)
{
LOG(MIRRORING, ERR, "Unable to notify buddy about resync attempt. Resync will not start.",
targetID, buddyTargetID);
setStatus(BuddyResyncJobState_FAILURE);
goto cleanup;
}
startGatherSlavesRes = startGatherSlaves(target);
if (!startGatherSlavesRes)
{
setStatus(BuddyResyncJobState_FAILURE);
goto cleanup;
}
startSyncSlavesRes = startSyncSlaves();
if (!startSyncSlavesRes)
{
setStatus(BuddyResyncJobState_FAILURE);
// terminate gather slaves
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->selfTerminate();
goto cleanup;
}
numDirsDiscovered.setZero();
numDirsMatched.setZero();
// walk over the directories until we reach a certain level and then pass the direcories to
// gather slaves to parallelize it
targetPath = target.getPath().str();
chunksPath = targetPath + "/" + CONFIG_BUDDYMIRROR_SUBDIR_NAME;
lastBuddyComm = target.getLastBuddyComm();
buddyCommIsOverride = lastBuddyComm.first;
lastBuddyCommTimeSecs = std::chrono::system_clock::to_time_t(lastBuddyComm.second);
lastBuddyCommSafetyThresholdSecs = app->getConfig()->getSysResyncSafetyThresholdMins()*60;
if ( (lastBuddyCommSafetyThresholdSecs == 0) && (!buddyCommIsOverride) ) // ignore timestamp file
lastBuddyCommTimeSecs = 0;
else
if (lastBuddyCommTimeSecs > lastBuddyCommSafetyThresholdSecs)
lastBuddyCommTimeSecs -= lastBuddyCommSafetyThresholdSecs;
checkTopLevelDirRes = checkTopLevelDir(chunksPath, lastBuddyCommTimeSecs);
if (!checkTopLevelDirRes)
{
setStatus(BuddyResyncJobState_FAILURE);
// terminate gather slaves
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->selfTerminate();
// terminate sync slaves
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
fileSyncSlaveVec[i]->selfTerminate();
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
dirSyncSlaveVec[i]->selfTerminate();
goto cleanup;
}
walkRes = walkDirs(chunksPath, "", 0, lastBuddyCommTimeSecs);
if (!walkRes)
{
setStatus(BuddyResyncJobState_FAILURE);
// terminate gather slaves
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->selfTerminate();
// terminate sync slaves
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
fileSyncSlaveVec[i]->selfTerminate();
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
dirSyncSlaveVec[i]->selfTerminate();
goto cleanup;
}
// all directories are read => tell gather slave to stop when work queue is empty and wait for
// all to stop
for(size_t i = 0; i < gatherSlaveVec.size(); i++)
{
if (likely(shallAbort.read() == 0))
gatherSlaveVec[i]->setOnlyTerminateIfIdle(true);
else
gatherSlaveVec[i]->setOnlyTerminateIfIdle(false);
gatherSlaveVec[i]->selfTerminate();
}
joinGatherSlaves();
// gather slaves have finished => tell sync slaves to stop when work packages are empty and wait
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
{
if (likely(shallAbort.read() == 0))
fileSyncSlaveVec[i]->setOnlyTerminateIfIdle(true);
else
fileSyncSlaveVec[i]->setOnlyTerminateIfIdle(false);
fileSyncSlaveVec[i]->selfTerminate();
}
for(size_t i = 0; i < dirSyncSlaveVec.size(); i++)
{
if (likely(shallAbort.read() == 0))
dirSyncSlaveVec[i]->setOnlyTerminateIfIdle(true);
else
dirSyncSlaveVec[i]->setOnlyTerminateIfIdle(false);
dirSyncSlaveVec[i]->selfTerminate();
}
joinSyncSlaves();
cleanup:
// wait for gather slaves to stop
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin();
iter != gatherSlaveVec.end(); iter++)
{
BuddyResyncerGatherSlave* slave = *iter;
if(slave)
{
std::lock_guard<Mutex> safeLock(slave->statusMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&(slave->statusMutex));
}
}
bool syncErrors = false;
// wait for sync slaves to stop and save if any errors occured
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
iter != fileSyncSlaveVec.end(); iter++)
{
BuddyResyncerFileSyncSlave* slave = *iter;
if(slave)
{
{
std::lock_guard<Mutex> safeLock(slave->statusMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&(slave->statusMutex));
}
if (slave->getErrorCount() != 0)
syncErrors = true;
}
}
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
iter != dirSyncSlaveVec.end(); iter++)
{
BuddyResyncerDirSyncSlave* slave = *iter;
if(slave)
{
{
std::lock_guard<Mutex> safeLock(slave->statusMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&(slave->statusMutex));
}
if (slave->getErrorCount() != 0)
syncErrors = true;
}
}
if (getStatus() == BuddyResyncJobState_RUNNING) // status not set to anything special
{ // (e.g. FAILURE)
if (shallAbort.read() != 0) // job aborted?
{
setStatus(BuddyResyncJobState_INTERRUPTED);
informBuddy();
}
else if (syncErrors || targetWasOffline.read()) // any sync errors or success?
{
// we must set the buddy BAD if it has been offline during any period of time during which
// the resync was also running. we implicitly do this during resync proper, since resync
// slaves abort with errors if the target is offline. if the target goes offline *after*
// the last proper resync messages has been sent and comes *back* before we try to inform
// it we will never detect that it has been offline at all. concurrently executing
// messages (eg TruncFile) may run between our opportunities to detect the offline state
// and may fail to forward their actions *even though they should forward*. this would
// lead to an inconsistent secondary. since the target has gone offline, the only
// reasonable course of action is to fail to resync entirely.
setStatus(BuddyResyncJobState_ERRORS);
informBuddy();
}
else
{
setStatus(BuddyResyncJobState_SUCCESS);
// unset timestamp override file if an override was set
target.setLastBuddyComm(std::chrono::system_clock::from_time_t(0), true);
// so the target went offline between the previous check "syncErrors || targetWasOffline".
// any message that has tried to forward itself in the intervening time will have seen the
// offline state, but will have been unable to set the buddy to needs-resync because it
// still *is* needs-resync. the resync itself has been perfectly successful, but we have
// to start another one anyway once the target comes back to ensure that no information
// was lost.
target.setBuddyNeedsResync(targetWasOffline.read());
informBuddy();
if (targetWasOffline.read())
LOG(MIRRORING, WARNING,
"Resync successful, but target went offline during finalization. "
"Setting target to needs-resync again.", targetID);
}
}
target.setBuddyResyncInProgress(false);
endTime = time(NULL);
}
void BuddyResyncJob::abort()
{
shallAbort.set(1); // tell the file walk in this class to abort
// set setOnlyTerminateIfIdle on the slaves to false; they will be stopped by the main loop then
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin(); iter != gatherSlaveVec.end();
iter++)
{
BuddyResyncerGatherSlave* slave = *iter;
if(slave)
{
slave->setOnlyTerminateIfIdle(false);
}
}
// stop sync slaves
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
iter != fileSyncSlaveVec.end(); iter++)
{
BuddyResyncerFileSyncSlave* slave = *iter;
if(slave)
{
slave->setOnlyTerminateIfIdle(false);
}
}
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
iter != dirSyncSlaveVec.end(); iter++)
{
BuddyResyncerDirSyncSlave* slave = *iter;
if(slave)
{
slave->setOnlyTerminateIfIdle(false);
}
}
}
bool BuddyResyncJob::startGatherSlaves(const StorageTarget& target)
{
// create a gather slaves if they don't exist yet and start them
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
{
if(!gatherSlaveVec[i])
gatherSlaveVec[i] = new BuddyResyncerGatherSlave(target, &syncCandidates,
&gatherSlavesWorkQueue, i);
try
{
gatherSlaveVec[i]->resetSelfTerminate();
gatherSlaveVec[i]->start();
gatherSlaveVec[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
return false;
}
}
return true;
}
bool BuddyResyncJob::startSyncSlaves()
{
// create sync slaves and start them
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
{
if(!fileSyncSlaveVec[i])
fileSyncSlaveVec[i] = new BuddyResyncerFileSyncSlave(targetID, &syncCandidates, i);
try
{
fileSyncSlaveVec[i]->resetSelfTerminate();
fileSyncSlaveVec[i]->start();
fileSyncSlaveVec[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
// stop already started sync slaves
for(size_t j = 0; j < i; j++)
fileSyncSlaveVec[j]->selfTerminate();
return false;
}
}
for(size_t i = 0; i < dirSyncSlaveVec.size(); i++)
{
if(!dirSyncSlaveVec[i])
dirSyncSlaveVec[i] = new BuddyResyncerDirSyncSlave(targetID, &syncCandidates, i);
try
{
dirSyncSlaveVec[i]->resetSelfTerminate();
dirSyncSlaveVec[i]->start();
dirSyncSlaveVec[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
// stop already started sync slaves
for (size_t j = 0; j < fileSyncSlaveVec.size(); j++)
fileSyncSlaveVec[j]->selfTerminate();
for (size_t j = 0; j < i; j++)
dirSyncSlaveVec[j]->selfTerminate();
return false;
}
}
return true;
}
void BuddyResyncJob::joinGatherSlaves()
{
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->join();
}
void BuddyResyncJob::joinSyncSlaves()
{
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
fileSyncSlaveVec[i]->join();
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
dirSyncSlaveVec[i]->join();
}
void BuddyResyncJob::getJobStats(StorageBuddyResyncJobStatistics& outStats)
{
uint64_t discoveredFiles = 0;
uint64_t matchedFiles = 0;
uint64_t discoveredDirs = numDirsDiscovered.read();
uint64_t matchedDirs = numDirsMatched.read();
uint64_t syncedFiles = 0;
uint64_t syncedDirs = 0;
uint64_t errorFiles = 0;
uint64_t errorDirs = 0;
for(size_t i = 0; i < gatherSlaveVec.size(); i++)
{
BuddyResyncerGatherSlave* slave = gatherSlaveVec[i];
if(slave)
{
uint64_t tmpDiscoveredFiles = 0;
uint64_t tmpMatchedFiles = 0;
uint64_t tmpDiscoveredDirs = 0;
uint64_t tmpMatchedDirs = 0;
slave->getCounters(tmpDiscoveredFiles, tmpMatchedFiles, tmpDiscoveredDirs, tmpMatchedDirs);
discoveredFiles += tmpDiscoveredFiles;
matchedFiles += tmpMatchedFiles;
discoveredDirs += tmpDiscoveredDirs;
matchedDirs += tmpMatchedDirs;
}
}
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
{
BuddyResyncerFileSyncSlave* slave = fileSyncSlaveVec[i];
if(slave)
{
syncedFiles += slave->getNumChunksSynced();
errorFiles += slave->getErrorCount();
}
}
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
{
BuddyResyncerDirSyncSlave* slave = dirSyncSlaveVec[i];
if (slave)
{
syncedDirs += slave->getNumDirsSynced();
discoveredDirs += slave->getNumAdditionalDirsMatched();
matchedDirs += slave->getNumAdditionalDirsMatched();
errorDirs += slave->getErrorCount();
}
}
outStats = StorageBuddyResyncJobStatistics(status, startTime, endTime, discoveredFiles,
discoveredDirs, matchedFiles, matchedDirs, syncedFiles, syncedDirs, errorFiles, errorDirs);
}
void BuddyResyncJob::informBuddy()
{
App* app = Program::getApp();
NodeStore* storageNodes = app->getStorageNodes();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
TargetMapper* targetMapper = app->getTargetMapper();
BuddyResyncJobState status = getStatus();
TargetConsistencyState newTargetState;
if ( (status == BuddyResyncJobState_ERRORS) || (status == BuddyResyncJobState_INTERRUPTED))
newTargetState = TargetConsistencyState_BAD;
else
if (status == BuddyResyncJobState_SUCCESS)
newTargetState = TargetConsistencyState_GOOD;
else
{
LogContext(__func__).log(Log_NOTICE, "Refusing to set a state for buddy target, because "
"resync status isn't well-defined. "
"localTargetID: " + StringTk::uintToStr(targetID) + "; "
"resyncState: " + StringTk::intToStr(status));
return;
}
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(targetID);
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto storageNode = storageNodes->referenceNode(buddyNodeID);
if (!storageNode)
{
LogContext(__func__).logErr(
"Unable to inform buddy about finished resync. TargetID: " + StringTk::uintToStr(targetID)
+ "; buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; buddyNodeID: "
+ buddyNodeID.str() + "; error: unknown storage node");
return;
}
SetTargetConsistencyStatesRespMsg* respMsgCast;
FhgfsOpsErr result;
UInt16List targetIDs;
UInt8List states;
targetIDs.push_back(buddyTargetID);
states.push_back(newTargetState);
SetTargetConsistencyStatesMsg msg(NODETYPE_Storage, &targetIDs, &states, false);
const auto respMsg = MessagingTk::requestResponse(*storageNode, msg,
NETMSGTYPE_SetTargetConsistencyStatesResp);
if (!respMsg)
{
LogContext(__func__).logErr(
"Unable to inform buddy about finished resync. "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"buddyNodeID: " + buddyNodeID.str() + "; "
"error: Communication error");
return;
}
respMsgCast = (SetTargetConsistencyStatesRespMsg*) respMsg.get();
result = respMsgCast->getResult();
if(result != FhgfsOpsErr_SUCCESS)
{
LogContext(__func__).logErr(
"Error while informing buddy about finished resync. "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"buddyNodeID: " + buddyNodeID.str() + "; "
"error: " + boost::lexical_cast<std::string>(result));
}
}
/*
* check the CONFIG_BUDDYMIRROR_SUBDIR_NAME directory
*/
bool BuddyResyncJob::checkTopLevelDir(std::string& path, int64_t lastBuddyCommTimeSecs)
{
struct stat statBuf;
int statRes = stat(path.c_str(), &statBuf);
if(statRes != 0)
{
LogContext(__func__).log(Log_WARNING,
"Couldn't stat chunks directory; resync job can't run. targetID: "
+ StringTk::uintToStr(targetID) + "; path: " + path
+ "; Error: " + System::getErrString(errno));
return false;
}
numDirsDiscovered.increase();
int64_t dirMTime = (int64_t) statBuf.st_mtim.tv_sec;
if(dirMTime > lastBuddyCommTimeSecs)
{ // sync candidate
ChunkSyncCandidateDir candidate("", targetID);
syncCandidates.add(candidate, this);
numDirsMatched.increase();
}
return true;
}
/*
* recursively walk through buddy mir directory until a depth of BUDDYRESYNCJOB_MAXDIRWALKDEPTH is
* reached; everything with a greater depth gets passed to the GatherSlaves to work on it in
* parallel
*/
bool BuddyResyncJob::walkDirs(std::string chunksPath, std::string relPath, int level,
int64_t lastBuddyCommTimeSecs)
{
bool retVal = true;
DIR* dirHandle;
struct dirent* dirEntry;
dirHandle = opendir(std::string(chunksPath + "/" + relPath).c_str());
if(!dirHandle)
{
LogContext(__func__).logErr("Unable to open path. "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"Rel. path: " + relPath + "; "
"Error: " + System::getErrString(errno) );
return false;
}
while ((dirEntry = StorageTk::readdirFiltered(dirHandle)) != NULL)
{
if(shallAbort.read() != 0)
break;
// get stat info
std::string currentRelPath;
if(unlikely(relPath.empty()))
currentRelPath = dirEntry->d_name;
else
currentRelPath = relPath + "/" + dirEntry->d_name;
std::string currentFullPath = chunksPath + "/" + currentRelPath;
struct stat statBuf;
int statRes = stat(currentFullPath.c_str(), &statBuf);
if(statRes != 0)
{
LogContext(__func__).log(Log_WARNING,
"Couldn't stat directory, which was discovered previously. Resync job might not be "
"complete. targetID " + StringTk::uintToStr(targetID) + "; "
"Rel. path: " + relPath + "; "
"Error: " + System::getErrString(errno));
retVal = false;
break; // => one error aborts it all
}
if(S_ISDIR(statBuf.st_mode))
{
// if level of dir is smaller than max, take care of it and recurse into it
if(level < BUDDYRESYNCJOB_MAXDIRWALKDEPTH)
{
numDirsDiscovered.increase();
int64_t dirMTime = (int64_t) statBuf.st_mtim.tv_sec;
if(dirMTime > lastBuddyCommTimeSecs)
{ // sync candidate
ChunkSyncCandidateDir candidate(currentRelPath, targetID);
syncCandidates.add(candidate, this);
numDirsMatched.increase();
}
bool walkRes = walkDirs(chunksPath, currentRelPath, level+1, lastBuddyCommTimeSecs);
if (!walkRes)
retVal = false;
}
else
// otherwise pass it to the slaves; NOTE: gather slave takes full path
gatherSlavesWorkQueue.add(currentFullPath, this);
}
else
{
LOG_DEBUG(__func__, Log_WARNING, "Found a file in directory structure");
}
}
if(!dirEntry && errno) // error occured
{
LogContext(__func__).logErr(
"Unable to read all directories; chunksPath: " + chunksPath + "; relativePath: " + relPath
+ "; SysErr: " + System::getErrString(errno));
retVal = false;
}
int closedirRes = closedir(dirHandle);
if (closedirRes != 0)
LOG_DEBUG(__func__, Log_WARNING,
"Unable to open path. targetID " + StringTk::uintToStr(targetID) + "; Rel. path: "
+ relPath + "; Error: " + System::getErrString(errno));
return retVal;
}

View File

@@ -0,0 +1,90 @@
#pragma once
#include <common/storage/mirroring/BuddyResyncJobStatistics.h>
#include <components/buddyresyncer/BuddyResyncerDirSyncSlave.h>
#include <components/buddyresyncer/BuddyResyncerFileSyncSlave.h>
#include <components/buddyresyncer/BuddyResyncerGatherSlave.h>
#define GATHERSLAVEQUEUE_MAXSIZE 5000
class BuddyResyncJob : public PThread
{
friend class GenericDebugMsgEx;
public:
BuddyResyncJob(uint16_t targetID);
virtual ~BuddyResyncJob();
virtual void run();
void abort();
void getJobStats(StorageBuddyResyncJobStatistics& outStats);
private:
uint16_t targetID;
Mutex statusMutex;
BuddyResyncJobState status;
int64_t startTime;
int64_t endTime;
ChunkSyncCandidateStore syncCandidates;
BuddyResyncerGatherSlaveWorkQueue gatherSlavesWorkQueue;
BuddyResyncerGatherSlaveVec gatherSlaveVec;
BuddyResyncerFileSyncSlaveVec fileSyncSlaveVec;
BuddyResyncerDirSyncSlaveVec dirSyncSlaveVec;
// this thread walks over the top dir structures itself, so we need to track that
AtomicUInt64 numDirsDiscovered;
AtomicUInt64 numDirsMatched;
AtomicInt16 shallAbort; // quasi-boolean
AtomicInt16 targetWasOffline;
bool checkTopLevelDir(std::string& path, int64_t lastBuddyCommTimeSecs);
bool walkDirs(std::string chunksPath, std::string relPath, int level,
int64_t lastBuddyCommTimeSecs);
bool startGatherSlaves(const StorageTarget& target);
bool startSyncSlaves();
void joinGatherSlaves();
void joinSyncSlaves();
public:
uint16_t getTargetID() const
{
return targetID;
}
BuddyResyncJobState getStatus()
{
std::lock_guard<Mutex> mutexLock(statusMutex);
return status;
}
bool isRunning()
{
std::lock_guard<Mutex> mutexLock(statusMutex);
return status == BuddyResyncJobState_RUNNING;
}
void setTargetOffline()
{
targetWasOffline.set(1);
}
private:
void setStatus(BuddyResyncJobState status)
{
std::lock_guard<Mutex> mutexLock(statusMutex);
this->status = status;
}
void informBuddy();
};
typedef std::map<uint16_t, BuddyResyncJob*> BuddyResyncJobMap; //mapping: targetID, job
typedef BuddyResyncJobMap::iterator BuddyResyncJobMapIter;

View File

@@ -0,0 +1,40 @@
#include <program/Program.h>
#include "BuddyResyncer.h"
BuddyResyncer::~BuddyResyncer()
{
// delete remaining jobs
for (BuddyResyncJobMapIter iter = resyncJobMap.begin(); iter != resyncJobMap.end(); iter++)
{
BuddyResyncJob* job = iter->second;
if( job->isRunning() )
{
job->abort();
job->join();
}
SAFE_DELETE(job);
}
}
/**
* @return FhgfsOpsErr_SUCCESS if everything was successfully started, FhgfsOpsErr_INUSE if already
* running
*/
FhgfsOpsErr BuddyResyncer::startResync(uint16_t targetID)
{
bool isNewJob;
// try to add an existing resync job; if it already exists, we get that
BuddyResyncJob* resyncJob = addResyncJob(targetID, isNewJob);
// Job already exists *and* is already running:
if (!isNewJob && resyncJob->isRunning() )
return FhgfsOpsErr_INUSE;
// job is ready and not running
resyncJob->start();
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,59 @@
#pragma once
#include <components/buddyresyncer/BuddyResyncJob.h>
#include <mutex>
/**
* This is not a component that represents a separate thread by itself. Instead, it is the
* controlling frontend for slave threads, which are started and stopped on request (i.e. it is not
* automatically started when the app is started).
*
* Callers should only use methods in this controlling frontend and not access the slave's methods
* directly.
*/
class BuddyResyncer
{
public:
~BuddyResyncer();
FhgfsOpsErr startResync(uint16_t targetID);
private:
BuddyResyncJobMap resyncJobMap;
Mutex resyncJobMapMutex;
public:
BuddyResyncJob* getResyncJob(uint16_t targetID)
{
std::lock_guard<Mutex> mutexLock(resyncJobMapMutex);
BuddyResyncJobMapIter iter = resyncJobMap.find(targetID);
if (iter != resyncJobMap.end())
return iter->second;
else
return NULL;
}
private:
BuddyResyncJob* addResyncJob(uint16_t targetID, bool& outIsNew)
{
std::lock_guard<Mutex> mutexLock(resyncJobMapMutex);
BuddyResyncJobMapIter iter = resyncJobMap.find(targetID);
if (iter != resyncJobMap.end())
{
outIsNew = false;
return iter->second;
}
else
{
BuddyResyncJob* job = new BuddyResyncJob(targetID);
resyncJobMap.insert(BuddyResyncJobMap::value_type(targetID, job) );
outIsNew = true;
return job;
}
}
};

View File

@@ -0,0 +1,395 @@
#include <app/App.h>
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <common/net/message/storage/listing/ListChunkDirIncrementalMsg.h>
#include <common/net/message/storage/listing/ListChunkDirIncrementalRespMsg.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "BuddyResyncerDirSyncSlave.h"
#include <boost/lexical_cast.hpp>
#define CHECK_AT_ONCE 50
BuddyResyncerDirSyncSlave::BuddyResyncerDirSyncSlave(uint16_t targetID,
ChunkSyncCandidateStore* syncCandidates, uint8_t slaveID) :
PThread("BuddyResyncerDirSyncSlave_" + StringTk::uintToStr(targetID) + "-"
+ StringTk::uintToStr(slaveID))
{
this->isRunning = false;
this->targetID = targetID;
this->syncCandidates = syncCandidates;
}
BuddyResyncerDirSyncSlave::~BuddyResyncerDirSyncSlave()
{
}
/**
* This is a component, which is started through its control frontend on-demand at
* runtime and terminates when it's done.
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
* of this thread running at the same time.
*/
void BuddyResyncerDirSyncSlave::run()
{
setIsRunning(true);
try
{
LogContext(__func__).log(Log_DEBUG, "Component started.");
registerSignalHandler();
numAdditionalDirsMatched.setZero();
numDirsSynced.setZero();
errorCount.setZero();
syncLoop();
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerDirSyncSlave::syncLoop()
{
App* app = Program::getApp();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
while (! getSelfTerminateNotIdle())
{
if((syncCandidates->isDirsEmpty()) && (getSelfTerminate()))
break;
ChunkSyncCandidateDir candidate;
syncCandidates->fetch(candidate, this);
if (unlikely(candidate.getTargetID() == 0)) // ignore targetID 0
continue;
std::string relativePath = candidate.getRelativePath();
uint16_t localTargetID = candidate.getTargetID();
// get buddy targetID
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(localTargetID);
// perform sync
FhgfsOpsErr resyncRes = doSync(relativePath, localTargetID, buddyTargetID);
if (resyncRes == FhgfsOpsErr_SUCCESS)
numDirsSynced.increase();
else if (resyncRes != FhgfsOpsErr_INTERRUPTED)
errorCount.increase(); // increment error count if an error occurred; note: if the slaves
// were interrupted from the outside (e.g. ctl) this is not an error
}
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::doSync(const std::string& dirPath, uint16_t localTargetID,
uint16_t buddyTargetID)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
App* app = Program::getApp();
TargetMapper* targetMapper = app->getTargetMapper();
NodeStoreServers* storageNodes = app->getStorageNodes();
// try to find the node with the buddyTargetID
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto node = storageNodes->referenceNode(buddyNodeID);
if(!node)
{
LogContext(__func__).logErr(
"Storage node does not exist; nodeID " + buddyNodeID.str());
return FhgfsOpsErr_UNKNOWNNODE;
}
int64_t offset = 0;
unsigned entriesFetched;
do
{
int64_t newOffset;
StringList names;
IntList entryTypes;
FhgfsOpsErr listRes = getBuddyDirContents(*node, dirPath, buddyTargetID, offset, names,
entryTypes, newOffset);
if(listRes != FhgfsOpsErr_SUCCESS)
{
retVal = listRes;
break;
}
offset = newOffset;
entriesFetched = names.size();
// match locally
FhgfsOpsErr findRes = findChunks(localTargetID, dirPath, names, entryTypes);
if(findRes != FhgfsOpsErr_SUCCESS)
{
retVal = findRes;
break;
}
// delete the remaining chunks/dirs on the buddy
StringList rmPaths;
for (StringListIter iter = names.begin(); iter != names.end(); iter++)
{
std::string path = dirPath + "/" + *iter;
rmPaths.push_back(path);
}
FhgfsOpsErr rmRes = removeBuddyChunkPaths(*node, localTargetID, buddyTargetID, rmPaths);
if (rmRes != FhgfsOpsErr_SUCCESS)
{
retVal = rmRes;
break;
}
if (getSelfTerminateNotIdle())
{
retVal = FhgfsOpsErr_INTERRUPTED;
break;
}
} while (entriesFetched == CHECK_AT_ONCE);
return retVal;
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::getBuddyDirContents(Node& node, const std::string& dirPath,
uint16_t targetID, int64_t offset, StringList& outNames, IntList& outEntryTypes,
int64_t& outNewOffset)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
unsigned msgRetryIntervalMS = 5000;
// get a part of the dir contents from the buddy target
ListChunkDirIncrementalMsg listMsg(targetID, true, dirPath, offset, CHECK_AT_ONCE, false, true);
listMsg.setMsgHeaderTargetID(targetID);
CombinedTargetState state;
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(targetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ( (!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
{
respMsg = MessagingTk::requestResponse(node, listMsg, NETMSGTYPE_ListChunkDirIncrementalResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; sleeping "
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
+ StringTk::uintToStr(targetID));
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes = Program::getApp()->getTargetStateStore()->getState(targetID, state);
}
}
if (!respMsg)
{ // communication error
LogContext(__func__).logErr(
"Communication with storage node failed: " + node.getTypedNodeID());
retVal = FhgfsOpsErr_COMMUNICATION;
}
else
if(!getStateRes)
{
LogContext(__func__).logErr("No valid state for node ID: " + node.getTypedNodeID() );
retVal = FhgfsOpsErr_INTERNAL;
}
else
{
// correct response type received
ListChunkDirIncrementalRespMsg* respMsgCast = (ListChunkDirIncrementalRespMsg*) respMsg.get();
FhgfsOpsErr listRes = respMsgCast->getResult();
if (listRes == FhgfsOpsErr_SUCCESS)
{
outNewOffset = respMsgCast->getNewOffset();
respMsgCast->getNames().swap(outNames);
respMsgCast->getEntryTypes().swap(outEntryTypes);
}
else
if (listRes != FhgfsOpsErr_PATHNOTEXISTS)
{ // not exists is ok, because path might have been deleted
LogContext(__func__).log(Log_WARNING, "Error listing chunks dir; "
"dirPath: " + dirPath + "; "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"node: " + node.getTypedNodeID() + "; "
"Error: " + boost::lexical_cast<std::string>(listRes));
retVal = listRes;
}
}
return retVal;
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::findChunks(uint16_t targetID, const std::string& dirPath,
StringList& inOutNames, IntList& inOutEntryTypes)
{
App* app = Program::getApp();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
const auto& target = app->getStorageTargets()->getTargets().at(targetID);
const int targetFD = *target->getMirrorFD();
StringListIter namesIter = inOutNames.begin();
IntListIter typesIter = inOutEntryTypes.begin();
while (namesIter != inOutNames.end())
{
std::string entryID = *namesIter;
DirEntryType entryType = (DirEntryType)*typesIter;
std::string entryPath;
if (likely(!dirPath.empty()))
entryPath = dirPath + "/" + entryID;
else
entryPath = entryID;
if (DirEntryType_ISDIR(entryType))
{
bool entryExists = StorageTk::pathExists(targetFD, entryPath);
if (!entryExists)
{
// dir not found, so we didn't know about it yet => add it to sync candidate store, so
// that it gets checked and we get a list of its contents;
ChunkSyncCandidateDir syncCandidate(entryPath, targetID);
syncCandidates->add(syncCandidate, this);
numAdditionalDirsMatched.increase();
}
// no matter if found or not: remove it from the list, because we do not explicitely
// delete directories on the buddy
namesIter = inOutNames.erase(namesIter);
typesIter = inOutEntryTypes.erase(typesIter);
}
else
{
// need to lock the chunk to check it
chunkLockStore->lockChunk(targetID, entryID);
bool entryExists = StorageTk::pathExists(targetFD, entryPath);
if (entryExists)
{
// chunk found => delete it from list an unlock it
namesIter = inOutNames.erase(namesIter);
typesIter = inOutEntryTypes.erase(typesIter);
chunkLockStore->unlockChunk(targetID, entryID);
}
else
{
// chunk not found => keep lock; will be unlocked after removal
namesIter++;
typesIter++;
}
}
}
return FhgfsOpsErr_SUCCESS;
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::removeBuddyChunkPaths(Node& node, uint16_t localTargetID,
uint16_t buddyTargetID, StringList& paths)
{
unsigned msgRetryIntervalMS = 5000;
ChunkLockStore* chunkLockStore = Program::getApp()->getChunkLockStore();
RmChunkPathsMsg rmMsg(buddyTargetID, &paths);
rmMsg.addMsgHeaderFeatureFlag(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR);
rmMsg.setMsgHeaderTargetID(buddyTargetID);
CombinedTargetState state;
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ((!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE))
{
respMsg = MessagingTk::requestResponse(node, rmMsg, NETMSGTYPE_RmChunkPathsResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; sleeping "
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
+ StringTk::uintToStr(targetID));
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
}
}
// no matter if that succeeded or not we unlock all chunks here first
for (StringListIter iter = paths.begin(); iter != paths.end(); iter++)
{
std::string entryID = StorageTk::getPathBasename(*iter);
chunkLockStore->unlockChunk(localTargetID, entryID);
}
if (!respMsg)
{ // communication error
LogContext(__func__).logErr(
"Communication with storage node failed: " + node.getTypedNodeID());
return FhgfsOpsErr_COMMUNICATION;
}
else
if(!getStateRes)
{
LogContext(__func__).logErr("No valid state for node ID: " + node.getTypedNodeID() );
return FhgfsOpsErr_INTERNAL;
}
else
{
// correct response type received
RmChunkPathsRespMsg* respMsgCast = (RmChunkPathsRespMsg*) respMsg.get();
StringList& failedPaths = respMsgCast->getFailedPaths();
for(StringListIter iter = failedPaths.begin(); iter != failedPaths.end(); iter++)
{
LogContext(__func__).logErr("Chunk path could not be deleted; "
"path: " + *iter + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"node: " + node.getTypedNodeID());
}
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,106 @@
#pragma once
#include <common/nodes/Node.h>
#include <common/storage/StorageErrors.h>
#include <common/threading/PThread.h>
#include <components/buddyresyncer/SyncCandidate.h>
class BuddyResyncerDirSyncSlave : public PThread
{
friend class BuddyResyncer; // (to grant access to internal mutex)
friend class BuddyResyncJob; // (to grant access to internal mutex)
public:
BuddyResyncerDirSyncSlave(uint16_t targetID, ChunkSyncCandidateStore* syncCandidates,
uint8_t slaveID);
virtual ~BuddyResyncerDirSyncSlave();
private:
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
AtomicSizeT onlyTerminateIfIdle;
AtomicUInt64 numDirsSynced;
AtomicUInt64 numAdditionalDirsMatched;
AtomicUInt64 errorCount;
bool isRunning; // true if an instance of this component is currently running
uint16_t targetID;
ChunkSyncCandidateStore* syncCandidates;
virtual void run();
void syncLoop();
FhgfsOpsErr doSync(const std::string& dirPath, uint16_t localTargetID,
uint16_t buddyTargetID);
FhgfsOpsErr getBuddyDirContents(Node& node, const std::string& dirPath, uint16_t targetID,
int64_t offset, StringList& outNames, IntList& outEntryTypes, int64_t& outNewOffset);
FhgfsOpsErr findChunks(uint16_t targetID, const std::string& dirPath, StringList& inOutNames,
IntList& inOutEntryTypes);
FhgfsOpsErr removeBuddyChunkPaths(Node& node, uint16_t localTargetID, uint16_t buddyTargetID,
StringList& paths);
public:
// getters & setters
bool getIsRunning()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
void setOnlyTerminateIfIdle(bool value)
{
if (value)
onlyTerminateIfIdle.set(1);
else
onlyTerminateIfIdle.setZero();
}
bool getOnlyTerminateIfIdle()
{
if (onlyTerminateIfIdle.read() == 0)
return false;
else
return true;
}
uint64_t getNumDirsSynced()
{
return numDirsSynced.read();
}
uint64_t getNumAdditionalDirsMatched()
{
return numAdditionalDirsMatched.read();
}
uint64_t getErrorCount()
{
return errorCount.read();
}
private:
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
}
};
typedef std::list<BuddyResyncerDirSyncSlave*> BuddyResyncerDirSyncSlaveList;
typedef BuddyResyncerDirSyncSlaveList::iterator BuddyResyncerDirSyncSlaveListIter;
typedef std::vector<BuddyResyncerDirSyncSlave*> BuddyResyncerDirSyncSlaveVec;
typedef BuddyResyncerDirSyncSlaveVec::iterator BuddyResyncerDirSyncSlaveVecIter;

View File

@@ -0,0 +1,471 @@
#include <app/App.h>
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <common/net/message/storage/mirroring/ResyncLocalFileMsg.h>
#include <common/net/message/storage/mirroring/ResyncLocalFileRespMsg.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "BuddyResyncerFileSyncSlave.h"
#include <boost/lexical_cast.hpp>
#define PROCESS_AT_ONCE 1
#define SYNC_BLOCK_SIZE (1024*1024) // 1M
BuddyResyncerFileSyncSlave::BuddyResyncerFileSyncSlave(uint16_t targetID,
ChunkSyncCandidateStore* syncCandidates, uint8_t slaveID) :
PThread("BuddyResyncerFileSyncSlave_" + StringTk::uintToStr(targetID) + "-"
+ StringTk::uintToStr(slaveID))
{
this->isRunning = false;
this->syncCandidates = syncCandidates;
this->targetID = targetID;
}
BuddyResyncerFileSyncSlave::~BuddyResyncerFileSyncSlave()
{
}
/**
* This is a component, which is started through its control frontend on-demand at
* runtime and terminates when it's done.
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
* of this thread running at the same time.
*/
void BuddyResyncerFileSyncSlave::run()
{
setIsRunning(true);
try
{
LogContext(__func__).log(Log_DEBUG, "Component started.");
registerSignalHandler();
numChunksSynced.setZero();
errorCount.setZero();
syncLoop();
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerFileSyncSlave::syncLoop()
{
App* app = Program::getApp();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
while (! getSelfTerminateNotIdle())
{
if((syncCandidates->isFilesEmpty()) && (getSelfTerminate()))
break;
ChunkSyncCandidateFile candidate;
syncCandidates->fetch(candidate, this);
if (unlikely(candidate.getTargetID() == 0)) // ignore targetID 0
continue;
std::string relativePath = candidate.getRelativePath();
uint16_t localTargetID = candidate.getTargetID();
// get buddy targetID
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(localTargetID);
// perform sync
FhgfsOpsErr resyncRes = doResync(relativePath, localTargetID, buddyTargetID);
if (resyncRes == FhgfsOpsErr_SUCCESS)
numChunksSynced.increase();
else
if (resyncRes != FhgfsOpsErr_INTERRUPTED)
errorCount.increase();
}
}
FhgfsOpsErr BuddyResyncerFileSyncSlave::doResync(std::string& chunkPathStr, uint16_t localTargetID,
uint16_t buddyTargetID)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
unsigned msgRetryIntervalMS = 5000;
App* app = Program::getApp();
TargetMapper* targetMapper = app->getTargetMapper();
NodeStoreServers* storageNodes = app->getStorageNodes();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
std::string entryID = StorageTk::getPathBasename(chunkPathStr);
// try to find the node with the buddyTargetID
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto node = storageNodes->referenceNode(buddyNodeID);
if(!node)
{
LogContext(__func__).log(Log_WARNING,
"Storage node does not exist; nodeID " + buddyNodeID.str());
return FhgfsOpsErr_UNKNOWNNODE;
}
int64_t offset = 0;
ssize_t readRes = 0;
unsigned resyncMsgFlags = 0;
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_BUDDYMIRROR;
LogContext(__func__).log(Log_DEBUG,
"File sync started. chunkPath: " + chunkPathStr + "; localTargetID: "
+ StringTk::uintToStr(localTargetID) + "; buddyTargetID"
+ StringTk::uintToStr(buddyTargetID));
do
{
boost::scoped_array<char> data(new char[SYNC_BLOCK_SIZE]);
const auto& target = app->getStorageTargets()->getTargets().at(localTargetID);
// lock the chunk
chunkLockStore->lockChunk(localTargetID, entryID);
const int fd = openat(*target->getMirrorFD(), chunkPathStr.c_str(), O_RDONLY | O_NOATIME);
if (fd == -1)
{
int errCode = errno;
if(errCode == ENOENT)
{ // chunk was deleted => no error
// delete the mirror chunk and return
bool rmRes = removeBuddyChunkUnlocked(*node, buddyTargetID, chunkPathStr);
if (!rmRes) // rm failed; stop resync
{
LogContext(__func__).log(Log_WARNING,
"File sync not started. chunkPath: " + chunkPathStr + "; localTargetID: "
+ StringTk::uintToStr(localTargetID) + "; buddyTargetID: "
+ StringTk::uintToStr(buddyTargetID));
retVal = FhgfsOpsErr_INTERNAL;
}
}
else // error => log and return
{
LogContext(__func__).logErr(
"Open of chunk failed. chunkPath: " + chunkPathStr + "; targetID: "
+ StringTk::uintToStr(localTargetID) + "; Error: "
+ System::getErrString(errCode));
retVal = FhgfsOpsErr_INTERNAL;
}
chunkLockStore->unlockChunk(localTargetID, entryID);
goto cleanup;
}
int seekRes = lseek(fd, offset, SEEK_SET);
if (seekRes == -1)
{
LogContext(__func__).logErr(
"Seeking in chunk failed. chunkPath: " + chunkPathStr + "; targetID: "
+ StringTk::uintToStr(localTargetID) + "; offset: " + StringTk::int64ToStr(offset));
chunkLockStore->unlockChunk(localTargetID, entryID);
goto cleanup;
}
readRes = read(fd, data.get(), SYNC_BLOCK_SIZE);
if( readRes == -1)
{
LogContext(__func__).logErr("Error during read; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + System::getErrString(errno));
retVal = FhgfsOpsErr_INTERNAL;
goto end_of_loop;
}
if(readRes > 0)
{
const char zeroBuf[RESYNCER_SPARSE_BLOCK_SIZE] = { 0 };
// check if sparse blocks are in the buffer
ssize_t bufPos = 0;
bool dataFound = false;
while (bufPos < readRes)
{
size_t cmpLen = BEEGFS_MIN(readRes-bufPos, RESYNCER_SPARSE_BLOCK_SIZE);
int cmpRes = memcmp(data.get() + bufPos, zeroBuf, cmpLen);
if(cmpRes != 0)
dataFound = true;
else // sparse area detected
{
if(dataFound) // had data before
{
resyncMsgFlags |= RESYNCLOCALFILEMSG_CHECK_SPARSE; // let the receiver do a check
break; // and stop checking here
}
}
bufPos += cmpLen;
}
// this inner loop is over and there are only sparse areas
/* make sure we always send a msg at offset==0 to truncate the file and allow concurrent
writers in a big inital sparse area */
if(offset && (readRes > 0) && (readRes == SYNC_BLOCK_SIZE) && !dataFound)
{
goto end_of_loop;
// => no transfer needed
}
/* let the receiver do a check, because we might be sending a sparse block at beginnig or
end of file */
if(!dataFound)
resyncMsgFlags |= RESYNCLOCALFILEMSG_CHECK_SPARSE;
}
{
ResyncLocalFileMsg resyncMsg(data.get(), chunkPathStr, buddyTargetID, offset, readRes);
if (!readRes || (readRes < SYNC_BLOCK_SIZE) ) // last iteration, set attribs and trunc buddy chunk
{
struct stat statBuf;
int statRes = fstat(fd, &statBuf);
if (statRes == 0)
{
if(statBuf.st_size < offset)
{ // in case someone truncated the file while we're reading at a high offset
offset = statBuf.st_size;
resyncMsg.setOffset(offset);
}
else
if(offset && !readRes)
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_TRUNC;
int mode = statBuf.st_mode;
unsigned userID = statBuf.st_uid;
unsigned groupID = statBuf.st_gid;
int64_t mtimeSecs = statBuf.st_mtim.tv_sec;
int64_t atimeSecs = statBuf.st_atim.tv_sec;
SettableFileAttribs chunkAttribs = {mode, userID,groupID, mtimeSecs, atimeSecs};
resyncMsg.setChunkAttribs(chunkAttribs);
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_SETATTRIBS;
}
else
{
LogContext(__func__).logErr("Error getting chunk attributes; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + System::getErrString(errno));
}
}
resyncMsg.setMsgHeaderFeatureFlags(resyncMsgFlags);
resyncMsg.setMsgHeaderTargetID(buddyTargetID);
CombinedTargetState state;
bool getStateRes =
Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ( (!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
{
respMsg = MessagingTk::requestResponse(*node, resyncMsg,
NETMSGTYPE_ResyncLocalFileResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; sleeping "
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
+ StringTk::uintToStr(targetID));
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes =
Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
}
}
if (!respMsg)
{ // communication error
LogContext(__func__).log(Log_WARNING,
"Communication with storage node failed: " + node->getTypedNodeID());
retVal = FhgfsOpsErr_COMMUNICATION;
// set readRes to non-zero to force exiting loop
readRes = -2;
}
else
if(!getStateRes)
{
LogContext(__func__).log(Log_WARNING,
"No valid state for node ID: " + node->getTypedNodeID());
retVal = FhgfsOpsErr_INTERNAL;
// set readRes to non-zero to force exiting loop
readRes = -2;
}
else
{
// correct response type received
ResyncLocalFileRespMsg* respMsgCast = (ResyncLocalFileRespMsg*) respMsg.get();
FhgfsOpsErr syncRes = respMsgCast->getResult();
if(syncRes != FhgfsOpsErr_SUCCESS)
{
LogContext(__func__).log(Log_WARNING, "Error during resync; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + boost::lexical_cast<std::string>(syncRes));
retVal = syncRes;
// set readRes to non-zero to force exiting loop
readRes = -2;
}
}
}
end_of_loop:
int closeRes = close(fd);
if (closeRes == -1)
{
LogContext(__func__).log(Log_WARNING, "Error closing file descriptor; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + System::getErrString(errno));
}
// unlock the chunk
chunkLockStore->unlockChunk(localTargetID, entryID);
// increment offset for next iteration
offset += readRes;
if ( getSelfTerminateNotIdle() )
{
retVal = FhgfsOpsErr_INTERRUPTED;
break;
}
} while (readRes == SYNC_BLOCK_SIZE);
cleanup:
LogContext(__func__).log(Log_DEBUG, "File sync finished. chunkPath: " + chunkPathStr);
return retVal;
}
/**
* Note: Chunk has to be locked by caller.
*/
bool BuddyResyncerFileSyncSlave::removeBuddyChunkUnlocked(Node& node, uint16_t buddyTargetID,
std::string& pathStr)
{
bool retVal = true;
unsigned msgRetryIntervalMS = 5000;
std::string entryID = StorageTk::getPathBasename(pathStr);
StringList rmPaths;
rmPaths.push_back(pathStr);
RmChunkPathsMsg rmMsg(buddyTargetID, &rmPaths);
rmMsg.addMsgHeaderFeatureFlag(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR);
rmMsg.setMsgHeaderTargetID(buddyTargetID);
CombinedTargetState state;
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ( (!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
{
respMsg = MessagingTk::requestResponse(node, rmMsg, NETMSGTYPE_RmChunkPathsResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; "
"sleeping " + StringTk::uintToStr(msgRetryIntervalMS) + " ms before retry. "
"targetID: " + StringTk::uintToStr(targetID) );
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
}
}
if (!respMsg)
{ // communication error
LogContext(__func__).logErr(
"Communication with storage node failed: " + node.getTypedNodeID() );
return false;
}
else
if(!getStateRes)
{
LogContext(__func__).log(Log_WARNING,
"No valid state for node ID: " + node.getTypedNodeID() );
return false;
}
else
{
// correct response type received
RmChunkPathsRespMsg* respMsgCast = (RmChunkPathsRespMsg*) respMsg.get();
StringList& failedPaths = respMsgCast->getFailedPaths();
for (StringListIter iter = failedPaths.begin(); iter != failedPaths.end(); iter++)
{
LogContext(__func__).logErr("Chunk path could not be deleted; "
"path: " + *iter + "; "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"node: " + node.getTypedNodeID());
retVal = false;
}
}
return retVal;
}

View File

@@ -0,0 +1,98 @@
#pragma once
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/nodes/Node.h>
#include <common/storage/StorageErrors.h>
#include <common/threading/PThread.h>
#include <mutex>
class BuddyResyncerFileSyncSlave : public PThread
{
friend class BuddyResyncer; // (to grant access to internal mutex)
friend class BuddyResyncJob; // (to grant access to internal mutex)
public:
BuddyResyncerFileSyncSlave(uint16_t targetID, ChunkSyncCandidateStore* syncCandidates,
uint8_t slaveID);
virtual ~BuddyResyncerFileSyncSlave();
private:
AtomicSizeT onlyTerminateIfIdle; // atomic quasi-bool
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
AtomicUInt64 numChunksSynced;
AtomicUInt64 errorCount;
bool isRunning; // true if an instance of this component is currently running
uint16_t targetID;
ChunkSyncCandidateStore* syncCandidates;
virtual void run();
void syncLoop();
FhgfsOpsErr doResync(std::string& chunkPathStr, uint16_t localTargetID,
uint16_t buddyTargetID);
bool removeBuddyChunkUnlocked(Node& node, uint16_t buddyTargetID, std::string& pathStr);
public:
// getters & setters
bool getIsRunning()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
void setOnlyTerminateIfIdle(bool value)
{
if (value)
onlyTerminateIfIdle.set(1);
else
onlyTerminateIfIdle.setZero();
}
bool getOnlyTerminateIfIdle()
{
if (onlyTerminateIfIdle.read() == 0)
return false;
else
return true;
}
uint64_t getNumChunksSynced()
{
return numChunksSynced.read();
}
uint64_t getErrorCount()
{
return errorCount.read();
}
private:
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
}
};
typedef std::list<BuddyResyncerFileSyncSlave*> BuddyResyncerFileSyncSlaveList;
typedef BuddyResyncerFileSyncSlaveList::iterator BuddyResyncerFileSyncSlaveListIter;
typedef std::vector<BuddyResyncerFileSyncSlave*> BuddyResyncerFileSyncSlaveVec;
typedef BuddyResyncerFileSyncSlaveVec::iterator BuddyResyncerFileSyncSlaveVecIter;

View File

@@ -0,0 +1,162 @@
#include <app/App.h>
#include <toolkit/StorageTkEx.h>
#include <storage/StorageTargets.h>
#include <program/Program.h>
#include <mutex>
#include "BuddyResyncerGatherSlave.h"
Mutex BuddyResyncerGatherSlave::staticGatherSlavesMutex;
std::map<std::string, BuddyResyncerGatherSlave*> BuddyResyncerGatherSlave::staticGatherSlaves;
BuddyResyncerGatherSlave::BuddyResyncerGatherSlave(const StorageTarget& target,
ChunkSyncCandidateStore* syncCandidates, BuddyResyncerGatherSlaveWorkQueue* workQueue,
uint8_t slaveID) :
PThread("BuddyResyncerGatherSlave_" + StringTk::uintToStr(target.getID()) + "-" +
StringTk::uintToStr(slaveID)),
target(target)
{
this->isRunning = false;
this->syncCandidates = syncCandidates;
this->workQueue = workQueue;
const std::lock_guard<Mutex> lock(staticGatherSlavesMutex);
staticGatherSlaves[this->getName()] = this;
}
BuddyResyncerGatherSlave::~BuddyResyncerGatherSlave()
{
}
/**
* This is a component, which is started through its control frontend on-demand at
* runtime and terminates when it's done.
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
* of this thread running at the same time.
*/
void BuddyResyncerGatherSlave::run()
{
setIsRunning(true);
numChunksDiscovered.setZero();
numChunksMatched.setZero();
numDirsDiscovered.setZero();
numDirsMatched.setZero();
try
{
LogContext(__func__).log(Log_DEBUG, "Component started.");
registerSignalHandler();
workLoop();
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
}
catch(std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerGatherSlave::workLoop()
{
const unsigned maxOpenFDsNum = 20; // max open FDs => max path sub-depth for efficient traversal
while (!getSelfTerminateNotIdle())
{
if ((workQueue->queueEmpty()) && (getSelfTerminate()))
break;
// get a directory to scan
std::string pathStr = workQueue->fetch(this);
if(unlikely(pathStr.empty()))
continue;
int nftwRes = nftw(pathStr.c_str(), handleDiscoveredEntry, maxOpenFDsNum, FTW_ACTIONRETVAL);
if(nftwRes == -1)
{ // error occurred
LogContext(__func__).logErr("Error during chunks walk. SysErr: " + System::getErrString());
}
}
}
int BuddyResyncerGatherSlave::handleDiscoveredEntry(const char* path,
const struct stat* statBuf, int ftwEntryType, struct FTW* ftwBuf)
{
std::string chunksPath;
BuddyResyncerGatherSlave* thisStatic = nullptr;
{
const std::lock_guard<Mutex> lock(staticGatherSlavesMutex);
thisStatic = staticGatherSlaves[PThread::getCurrentThreadName()];
}
App* app = Program::getApp();
Config* cfg = app->getConfig();
const auto& targetPath = thisStatic->target.getPath().str();
chunksPath = targetPath + "/" + CONFIG_BUDDYMIRROR_SUBDIR_NAME;
if (strlen(path) <= chunksPath.length())
return FTW_CONTINUE;
std::string relPathStr = path + chunksPath.size() + 1;
if ( relPathStr.empty() )
return FTW_CONTINUE;
const auto lastBuddyComm = thisStatic->target.getLastBuddyComm();
const bool buddyCommIsOverride = lastBuddyComm.first;
int64_t lastBuddyCommTimeSecs = std::chrono::system_clock::to_time_t(lastBuddyComm.second);
int64_t lastBuddyCommSafetyThresholdSecs = cfg->getSysResyncSafetyThresholdMins()*60;
if ( (lastBuddyCommSafetyThresholdSecs == 0) && (!buddyCommIsOverride) ) // ignore timestamp file
lastBuddyCommTimeSecs = 0;
else
if (lastBuddyCommTimeSecs > lastBuddyCommSafetyThresholdSecs)
lastBuddyCommTimeSecs -= lastBuddyCommSafetyThresholdSecs;
if(ftwEntryType == FTW_D) // directory
{
thisStatic->numDirsDiscovered.increase();
int64_t dirModificationTime = (int64_t)statBuf->st_mtim.tv_sec;
if(dirModificationTime > lastBuddyCommTimeSecs)
{ // sync candidate
ChunkSyncCandidateDir candidate(relPathStr, thisStatic->target.getID());
thisStatic->syncCandidates->add(candidate, thisStatic);
thisStatic->numDirsMatched.increase();
}
}
else
if(ftwEntryType == FTW_F) // file
{
// we found a chunk
thisStatic->numChunksDiscovered.increase();
// we need to use ctime here, because mtime can be set manually (even to the future)
time_t chunkChangeTime = statBuf->st_ctim.tv_sec;
if(chunkChangeTime > lastBuddyCommTimeSecs)
{ // sync candidate
std::string relPathStr = path + chunksPath.size() + 1;
ChunkSyncCandidateFile candidate(relPathStr, thisStatic->target.getID());
thisStatic->syncCandidates->add(candidate, thisStatic);
thisStatic->numChunksMatched.increase();
}
}
return FTW_CONTINUE;
}

View File

@@ -0,0 +1,182 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/components/ComponentInitException.h>
#include <common/threading/PThread.h>
#include <ftw.h>
class StorageTarget;
#define GATHERSLAVEQUEUE_MAXSIZE 5000
class BuddyResyncerGatherSlaveWorkQueue
{
/*
* This is more or less just a small class for convenience, that is tightly coupled to
* BuddyResyncerGatherSlave and BuddyResyncerJob
*/
public:
BuddyResyncerGatherSlaveWorkQueue(): gatherSlavesWorkQueueLen(0) { }
private:
StringList paths;
size_t gatherSlavesWorkQueueLen; // used to avoid constant calling of size() method of list
Mutex mutex;
Condition pathAddedCond;
Condition pathFetchedCond;
public:
void add(std::string& path, PThread* caller)
{
unsigned waitTimeoutMS = 3000;
const std::lock_guard<Mutex> lock(mutex);
while (gatherSlavesWorkQueueLen > GATHERSLAVEQUEUE_MAXSIZE)
{
if((caller) && (unlikely(caller->getSelfTerminate())))
break;
pathFetchedCond.timedwait(&mutex, waitTimeoutMS);
}
paths.push_back(path);
gatherSlavesWorkQueueLen++;
pathAddedCond.signal();
}
std::string fetch(PThread* caller)
{
unsigned waitTimeoutMS = 3000;
const std::lock_guard<Mutex> lock(mutex);
while (paths.empty())
{
if((caller) && (unlikely(caller->getSelfTerminate())))
{
return "";
}
pathAddedCond.timedwait(&mutex, waitTimeoutMS);
}
std::string retVal = paths.front();
paths.pop_front();
gatherSlavesWorkQueueLen--;
pathFetchedCond.signal();
return retVal;
}
bool queueEmpty()
{
const std::lock_guard<Mutex> lock(mutex);
return gatherSlavesWorkQueueLen == 0;
}
void clear()
{
const std::lock_guard<Mutex> lock(mutex);
paths.clear();
gatherSlavesWorkQueueLen = 0;
}
};
class BuddyResyncerGatherSlave : public PThread
{
friend class BuddyResyncer; // (to grant access to internal mutex)
friend class BuddyResyncJob; // (to grant access to internal mutex)
public:
BuddyResyncerGatherSlave(const StorageTarget& target, ChunkSyncCandidateStore* syncCandidates,
BuddyResyncerGatherSlaveWorkQueue* workQueue, uint8_t slaveID);
virtual ~BuddyResyncerGatherSlave();
void workLoop();
private:
AtomicSizeT onlyTerminateIfIdle; // atomic quasi-bool
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
const StorageTarget& target;
AtomicUInt64 numChunksDiscovered;
AtomicUInt64 numChunksMatched;
AtomicUInt64 numDirsDiscovered;
AtomicUInt64 numDirsMatched;
bool isRunning; // true if an instance of this component is currently running
ChunkSyncCandidateStore* syncCandidates;
BuddyResyncerGatherSlaveWorkQueue* workQueue;
// nftw() callback needs access the slave threads
static Mutex staticGatherSlavesMutex;
static std::map<std::string, BuddyResyncerGatherSlave*> staticGatherSlaves;
virtual void run();
static int handleDiscoveredEntry(const char* path, const struct stat* statBuf,
int ftwEntryType, struct FTW* ftwBuf);
public:
// getters & setters
bool getIsRunning()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
void getCounters(uint64_t& outNumChunksDiscovered, uint64_t& outNumChunksMatched,
uint64_t& outNumDirsDiscovered, uint64_t& outNumDirsMatched)
{
outNumChunksDiscovered = numChunksDiscovered.read();
outNumChunksMatched = numChunksMatched.read();
outNumDirsDiscovered = numDirsDiscovered.read();
outNumDirsMatched = numDirsMatched.read();
}
void setOnlyTerminateIfIdle(bool value)
{
if (value)
onlyTerminateIfIdle.set(1);
else
onlyTerminateIfIdle.setZero();
}
bool getOnlyTerminateIfIdle()
{
if (onlyTerminateIfIdle.read() == 0)
return false;
else
return true;
}
private:
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
}
};
typedef std::vector<BuddyResyncerGatherSlave*> BuddyResyncerGatherSlaveVec;
typedef BuddyResyncerGatherSlaveVec::iterator BuddyResyncerGatherSlaveVecIter;

View File

@@ -0,0 +1,44 @@
#pragma once
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <string>
/**
* A storage sync candidate. Has a target ID and a path.
*/
class ChunkSyncCandidateDir
{
public:
ChunkSyncCandidateDir(const std::string& relativePath, const uint16_t targetID)
: relativePath(relativePath), targetID(targetID)
{ }
ChunkSyncCandidateDir()
: targetID(0)
{ }
private:
std::string relativePath;
uint16_t targetID;
public:
const std::string& getRelativePath() const { return relativePath; }
uint16_t getTargetID() const { return targetID; }
};
/**
* A storage sync candidate that also has an onlyAttribs flag.
*/
class ChunkSyncCandidateFile : public ChunkSyncCandidateDir
{
public:
ChunkSyncCandidateFile(const std::string& relativePath, uint16_t targetID)
: ChunkSyncCandidateDir(relativePath, targetID)
{ }
ChunkSyncCandidateFile() = default;
};
typedef SyncCandidateStore<ChunkSyncCandidateDir, ChunkSyncCandidateFile> ChunkSyncCandidateStore;

View File

@@ -0,0 +1,88 @@
#include "ChunkFetcher.h"
#include <program/Program.h>
#include <common/Common.h>
ChunkFetcher::ChunkFetcher()
: log("ChunkFetcher")
{
// for each targetID, put one fetcher thread into list
for (const auto& mapping : Program::getApp()->getStorageTargets()->getTargets())
this->slaves.emplace_back(mapping.first);
}
ChunkFetcher::~ChunkFetcher()
{
}
/**
* Start fetcher slaves if they are not running already.
*
* @return true if successfully started or already running, false if startup problem occurred.
*/
bool ChunkFetcher::startFetching()
{
const char* logContext = "ChunkFetcher (start)";
bool retVal = true; // false if error occurred
{
const std::lock_guard<Mutex> lock(chunksListMutex);
isBad = false;
}
for(ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
{
const std::lock_guard<Mutex> lock(iter->statusMutex);
if(!iter->isRunning)
{
// slave thread not running yet => start it
iter->resetSelfTerminate();
try
{
iter->start();
iter->isRunning = true;
}
catch (PThreadCreateException& e)
{
LogContext(logContext).logErr(std::string("Unable to start thread: ") + e.what());
retVal = false;
}
}
}
return retVal;
}
void ChunkFetcher::stopFetching()
{
for(ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
{
const std::lock_guard<Mutex> lock(iter->statusMutex);
if(iter->isRunning)
{
iter->selfTerminate();
}
}
}
void ChunkFetcher::waitForStopFetching()
{
for(ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
{
const std::lock_guard<Mutex> lock(iter->statusMutex);
chunksListFetchedCondition.broadcast();
while (iter->isRunning)
{
iter->isRunningChangeCond.wait(&(iter->statusMutex));
}
chunksList.clear();
}
}

View File

@@ -0,0 +1,101 @@
#pragma once
#include <components/chunkfetcher/ChunkFetcherSlave.h>
#include <common/toolkit/ListTk.h>
#include <mutex>
#define MAX_CHUNKLIST_SIZE 5000
// forward declaration
class ChunkFetcher;
typedef std::list<ChunkFetcherSlave> ChunkFetcherSlaveList;
typedef ChunkFetcherSlaveList::iterator ChunkFetcherSlaveListIter;
/**
* This is not a component that represents a separate thread. Instead, it contains and controls
* slave threads, which are started and stopped on request (i.e. they are not automatically started
* when the app is started).
* The slave threads will run over all chunks on all targets and read them in a format suitable for
* fsck
*/
class ChunkFetcher
{
public:
ChunkFetcher();
virtual ~ChunkFetcher();
bool startFetching();
void stopFetching();
void waitForStopFetching();
private:
LogContext log;
ChunkFetcherSlaveList slaves;
FsckChunkList chunksList;
Mutex chunksListMutex;
Condition chunksListFetchedCondition;
bool isBad;
public:
bool getIsBad()
{
const std::lock_guard<Mutex> lock(chunksListMutex);
return isBad;
}
void setBad()
{
const std::lock_guard<Mutex> lock(chunksListMutex);
isBad = true;
}
void addChunk(FsckChunk& chunk)
{
const std::lock_guard<Mutex> lock(chunksListMutex);
if (chunksList.size() > MAX_CHUNKLIST_SIZE)
chunksListFetchedCondition.wait(&chunksListMutex);
chunksList.push_back(chunk);
}
bool isQueueEmpty()
{
std::lock_guard<Mutex> lock(chunksListMutex);
return chunksList.empty();
}
void getAndDeleteChunks(FsckChunkList& outList, unsigned numChunks)
{
const std::lock_guard<Mutex> lock(chunksListMutex);
FsckChunkListIter iterEnd = this->chunksList.begin();
ListTk::advance(this->chunksList, iterEnd, numChunks);
outList.splice(outList.end(), this->chunksList, this->chunksList.begin(), iterEnd);
chunksListFetchedCondition.signal();
}
unsigned getNumRunning()
{
unsigned retVal = 0;
for (ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
{
const std::lock_guard<Mutex> lock(iter->statusMutex);
if ( iter->isRunning )
retVal++;
}
return retVal;
}
};

View File

@@ -0,0 +1,165 @@
#include "ChunkFetcherSlave.h"
#include <program/Program.h>
#include <boost/static_assert.hpp>
#include <libgen.h>
ChunkFetcherSlave::ChunkFetcherSlave(uint16_t targetID):
PThread("ChunkFetcherSlave-" + StringTk::uintToStr(targetID) ),
log("ChunkFetcherSlave-" + StringTk::uintToStr(targetID) ),
isRunning(false),
targetID(targetID)
{
}
ChunkFetcherSlave::~ChunkFetcherSlave()
{
}
void ChunkFetcherSlave::run()
{
setIsRunning(true);
try
{
registerSignalHandler();
walkAllChunks();
log.log(4, "Component stopped.");
}
catch(std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
/*
* walk over all chunks in that target
*/
void ChunkFetcherSlave::walkAllChunks()
{
App* app = Program::getApp();
log.log(Log_DEBUG, "Starting chunks walk...");
const auto& target = *app->getStorageTargets()->getTargets().at(targetID);
const auto& targetPath = target.getPath().str();
// walk over "normal" chunks (i.e. no mirrors)
std::string walkPath = targetPath + "/" + CONFIG_CHUNK_SUBDIR_NAME;
if(!walkChunkPath(walkPath, 0, walkPath.size() ) )
return;
// let's find out if this target is part of a buddy mirror group and if it is the primary
// target; if it is, walk over buddy mirror directory
bool isPrimaryTarget;
uint16_t buddyGroupID = app->getMirrorBuddyGroupMapper()->getBuddyGroupID(this->targetID,
&isPrimaryTarget);
if (isPrimaryTarget)
{
walkPath = targetPath + "/" CONFIG_BUDDYMIRROR_SUBDIR_NAME;
if(!walkChunkPath(walkPath, buddyGroupID, walkPath.size() ) )
return;
}
log.log(Log_DEBUG, "End of chunks walk.");
}
bool ChunkFetcherSlave::walkChunkPath(const std::string& path, uint16_t buddyGroupID,
unsigned basePathLen)
{
DIR* dir = ::opendir(path.c_str() );
if(!dir)
{
LOG(GENERAL, WARNING, "Could not open directory.", path, targetID, sysErr);
Program::getApp()->getChunkFetcher()->setBad();
return false;
}
int readRes;
bool result = true;
std::string pathBuf = path;
pathBuf.push_back('/');
while(!getSelfTerminate())
{
::dirent* item;
// we really want struct struct dirent to contain a reasonably sized array for the filename
BOOST_STATIC_ASSERT(sizeof(item->d_name) >= NAME_MAX + 1);
#if USE_READDIR_R
::dirent entry;
readRes = ::readdir_r(dir, &entry, &item);
#else
errno = 0;
item = readdir(dir);
readRes = item ? 0 : errno;
#endif
if(readRes != 0)
{
LOG(GENERAL, WARNING, "readdir failed.", path, targetID, sysErr(readRes));
result = false;
break;
}
if(!item)
break;
if(::strcmp(item->d_name, ".") == 0 || ::strcmp(item->d_name, "..") == 0)
continue;
pathBuf.resize(path.size() + 1);
pathBuf += item->d_name;
struct stat statBuf;
int statRes = ::stat(pathBuf.c_str(), &statBuf);
if(statRes)
{
LOG(GENERAL, WARNING, "Could not stat directory.", ("path", pathBuf), targetID, sysErr);
result = false;
break;
}
if(S_ISDIR(statBuf.st_mode) )
{
result = walkChunkPath(pathBuf, buddyGroupID, basePathLen);
if(!result)
break;
}
else
{
const char* relativeChunkPath = pathBuf.c_str() + basePathLen + 1;
// get only the dirname part of the path
char* tmpPathCopy = strdup(relativeChunkPath);
Path savedPath(dirname(tmpPathCopy) );
free(tmpPathCopy);
FsckChunk fsckChunk(item->d_name, targetID, savedPath, statBuf.st_size, statBuf.st_blocks,
statBuf.st_ctime, statBuf.st_mtime, statBuf.st_atime, statBuf.st_uid, statBuf.st_gid,
buddyGroupID);
Program::getApp()->getChunkFetcher()->addChunk(fsckChunk);
}
}
::closedir(dir);
if (getSelfTerminate())
result = false;
if(!result)
Program::getApp()->getChunkFetcher()->setBad();
return result;
}

View File

@@ -0,0 +1,62 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/components/ComponentInitException.h>
#include <common/fsck/FsckChunk.h>
#include <common/threading/PThread.h>
#include <mutex>
class ChunkFetcher; //forward decl.
/**
* This component runs over all chunks of one target and gathers information suitable for fsck
*
* This component is not auto-started when the app starts. It is started and stopped by the
* ChunkFetcher.
*/
class ChunkFetcherSlave : public PThread
{
friend class ChunkFetcher; // (to grant access to internal mutex)
public:
ChunkFetcherSlave(uint16_t targetID);
virtual ~ChunkFetcherSlave();
private:
LogContext log;
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
bool isRunning; // true if an instance of this component is currently running
uint16_t targetID;
virtual void run();
public:
// getters & setters
bool getIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
private:
void walkAllChunks();
bool walkChunkPath(const std::string& path, uint16_t buddyGroupID, unsigned basePathLen);
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
};

View File

@@ -0,0 +1,32 @@
#pragma once
#include <app/App.h>
#include <common/components/streamlistenerv2/StreamListenerV2.h>
#include <program/Program.h>
/**
* Other than common StreamListenerV2, this class can handle mutliple work queues through an
* overridden getWorkQueue() method.
*/
class StorageStreamListenerV2 : public StreamListenerV2
{
public:
StorageStreamListenerV2(std::string listenerID, AbstractApp* app):
StreamListenerV2(listenerID, app, NULL)
{
// nothing to be done here
}
virtual ~StorageStreamListenerV2() {}
protected:
// getters & setters
virtual MultiWorkQueue* getWorkQueue(uint16_t targetID) const
{
return Program::getApp()->getWorkQueue(targetID);
}
};

View File

@@ -0,0 +1,84 @@
#include <common/app/log/LogContext.h>
#include <common/benchmark/StorageBench.h>
#include <common/toolkit/StringTk.h>
#include <program/Program.h>
#include "StorageBenchWork.h"
void StorageBenchWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen)
{
const char* logContext = "Storage Benchmark (run)";
App* app = Program::getApp();
Config* cfg = app->getConfig();
int workRes = 0; // return value for benchmark operator
ssize_t ioRes = 0; // read/write result
if (this->type == StorageBenchType_READ)
{
size_t readSize = cfg->getTuneFileReadSize();
size_t toBeRead = this->bufLen;
size_t bufOffset = 0;
while(toBeRead)
{
size_t currentReadSize = BEEGFS_MIN(readSize, toBeRead);
ioRes = read(this->fileDescriptor, &this->buf[bufOffset], currentReadSize);
if (ioRes <= 0)
break;
toBeRead -= currentReadSize;
bufOffset += currentReadSize;
}
app->getNodeOpStats()->updateNodeOp(0, StorageOpCounter_READOPS,
this->bufLen, NETMSG_DEFAULT_USERID);
}
else
if (this->type == StorageBenchType_WRITE)
{
size_t writeSize = cfg->getTuneFileWriteSize();
size_t toBeWritten = this->bufLen;
size_t bufOffset = 0;
while(toBeWritten)
{
size_t currentWriteSize = BEEGFS_MIN(writeSize, toBeWritten);
ioRes = write(this->fileDescriptor, &this->buf[bufOffset], currentWriteSize);
if (ioRes <= 0)
break;
toBeWritten -= currentWriteSize;
bufOffset += currentWriteSize;
}
app->getNodeOpStats()->updateNodeOp(0, StorageOpCounter_WRITEOPS,
this->bufLen, NETMSG_DEFAULT_USERID);
}
else
{ // unknown benchmark type
workRes = STORAGEBENCH_ERROR_WORKER_ERROR;
LogContext(logContext).logErr("Error: unknown benchmark type");
}
if(unlikely(workRes < 0) || unlikely(ioRes == -1) )
{ // error occurred
if (ioRes == -1)
{ // read or write operation failed
LogContext(logContext).logErr(std::string("Error: I/O failure. SysErr: ") +
System::getErrString() );
}
workRes = STORAGEBENCH_ERROR_WORKER_ERROR;
this->operatorCommunication->getWriteFD()->write(&workRes, sizeof(int) );
}
else
{ // success
this->operatorCommunication->getWriteFD()->write(&this->threadID, sizeof(int) );
}
}

View File

@@ -0,0 +1,43 @@
#pragma once
#include <common/benchmark/StorageBench.h>
#include <common/components/worker/Work.h>
#include <common/toolkit/Pipe.h>
#include <common/Common.h>
class StorageBenchWork: public Work
{
public:
StorageBenchWork(uint16_t targetID, int threadID, int fileDescriptor,
StorageBenchType type, int64_t bufLen, Pipe* operatorCommunication, char* buf)
{
this->targetID = targetID;
this->threadID = threadID;
this->fileDescriptor = fileDescriptor;
this->type = type;
this->bufLen = bufLen;
this->operatorCommunication = operatorCommunication;
this->buf = buf;
}
virtual ~StorageBenchWork()
{
}
void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
protected:
private:
uint16_t targetID;
int threadID; // virtual threadID
int fileDescriptor;
StorageBenchType type;
int64_t bufLen;
char* buf;
Pipe* operatorCommunication;
};

View File

@@ -0,0 +1,203 @@
// control messages
#include <common/net/message/control/AuthenticateChannelMsgEx.h>
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/control/PeerInfoMsgEx.h>
#include "control/AckMsgEx.h"
#include "control/SetChannelDirectMsgEx.h"
// nodes messages
#include <common/net/message/nodes/ChangeTargetConsistencyStatesRespMsg.h>
#include <common/net/message/nodes/GetMirrorBuddyGroupsRespMsg.h>
#include <common/net/message/nodes/GetNodesRespMsg.h>
#include <common/net/message/nodes/GetStatesAndBuddyGroupsRespMsg.h>
#include <common/net/message/nodes/storagepools/GetStoragePoolsRespMsg.h>
#include <common/net/message/nodes/GetTargetMappingsRespMsg.h>
#include <common/net/message/nodes/GetTargetStatesRespMsg.h>
#include <common/net/message/nodes/MapTargetsRespMsg.h>
#include <common/net/message/nodes/RegisterNodeRespMsg.h>
#include <common/net/message/nodes/RegisterTargetRespMsg.h>
#include <common/net/message/nodes/RemoveNodeRespMsg.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <common/net/message/nodes/GetTargetConsistencyStatesRespMsg.h>
#include <net/message/nodes/GenericDebugMsgEx.h>
#include <net/message/nodes/GetClientStatsMsgEx.h>
#include <net/message/nodes/HeartbeatMsgEx.h>
#include <net/message/nodes/HeartbeatRequestMsgEx.h>
#include <net/message/nodes/MapTargetsMsgEx.h>
#include <net/message/nodes/PublishCapacitiesMsgEx.h>
#include <net/message/nodes/RefreshTargetStatesMsgEx.h>
#include <net/message/nodes/RemoveBuddyGroupMsgEx.h>
#include <net/message/nodes/RemoveNodeMsgEx.h>
#include <net/message/nodes/SetMirrorBuddyGroupMsgEx.h>
#include <net/message/nodes/SetTargetConsistencyStatesMsgEx.h>
#include <net/message/nodes/GetTargetConsistencyStatesMsgEx.h>
// storage messages
#include <common/net/message/storage/attribs/SetLocalAttrRespMsg.h>
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <common/net/message/storage/creating/UnlinkLocalFileRespMsg.h>
#include <common/net/message/storage/listing/ListChunkDirIncrementalRespMsg.h>
#include <common/net/message/storage/lookup/FindOwnerRespMsg.h>
#include <common/net/message/storage/mirroring/ResyncLocalFileRespMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
#include <common/net/message/storage/quota/GetQuotaInfoMsg.h>
#include <common/net/message/storage/quota/RequestExceededQuotaRespMsg.h>
#include <common/net/message/storage/TruncLocalFileRespMsg.h>
#include <common/net/message/storage/SetStorageTargetInfoRespMsg.h>
#include <net/message/storage/attribs/GetChunkFileAttribsMsgEx.h>
#include <net/message/storage/attribs/SetLocalAttrMsgEx.h>
#include <net/message/storage/creating/RmChunkPathsMsgEx.h>
#include <net/message/storage/creating/UnlinkLocalFileMsgEx.h>
#include <net/message/storage/listing/ListChunkDirIncrementalMsgEx.h>
#include <net/message/storage/mirroring/GetStorageResyncStatsMsgEx.h>
#include <net/message/storage/mirroring/ResyncLocalFileMsgEx.h>
#include <net/message/storage/mirroring/SetLastBuddyCommOverrideMsgEx.h>
#include <net/message/storage/mirroring/StorageResyncStartedMsgEx.h>
#include <net/message/storage/quota/GetQuotaInfoMsgEx.h>
#include <net/message/storage/quota/SetExceededQuotaMsgEx.h>
#include <net/message/storage/GetHighResStatsMsgEx.h>
#include <net/message/storage/StatStoragePathMsgEx.h>
#include <net/message/storage/TruncLocalFileMsgEx.h>
// session messages
#include <common/net/message/session/opening/CloseChunkFileRespMsg.h>
#include <common/net/message/session/rw/WriteLocalFileRespMsg.h>
#include <net/message/session/opening/CloseChunkFileMsgEx.h>
#include <net/message/session/rw/ReadLocalFileV2MsgEx.h>
#include <net/message/session/rw/WriteLocalFileMsgEx.h>
#include <net/message/session/FSyncLocalFileMsgEx.h>
#ifdef BEEGFS_NVFS
#include <net/message/session/rw/ReadLocalFileRDMAMsgEx.h>
#include <net/message/session/rw/WriteLocalFileRDMAMsgEx.h>
#endif /* BEEGFS_NVFS */
// mon messages
#include <net/message/mon/RequestStorageDataMsgEx.h>
// fsck
#include <net/message/fsck/DeleteChunksMsgEx.h>
#include <net/message/fsck/FetchFsckChunkListMsgEx.h>
#include <net/message/fsck/MoveChunkFileMsgEx.h>
// storage benchmark
#include <common/net/message/nodes/StorageBenchControlMsg.h>
#include <net/message/nodes/StorageBenchControlMsgEx.h>
// chunk balancing
#include <common/net/message/storage/chunkbalancing/StripePatternUpdateRespMsg.h>
#include <common/net/message/storage/chunkbalancing/CpChunkPathsRespMsg.h>
#include <net/message/storage/chunkbalancing/CpChunkPathsMsgEx.h>
#include <common/net/message/SimpleMsg.h>
#include <net/message/nodes/storagepools/RefreshStoragePoolsMsgEx.h>
#include "NetMessageFactory.h"
/**
* @return NetMessage that must be deleted by the caller
* (msg->msgType is NETMSGTYPE_Invalid on error)
*/
std::unique_ptr<NetMessage> NetMessageFactory::createFromMsgType(unsigned short msgType) const
{
NetMessage* msg;
switch(msgType)
{
// The following lines are grouped by "type of the message" and ordered alphabetically inside
// the groups. There should always be one message per line to keep a clear layout (although
// this might lead to lines that are longer than usual)
// control messages
case NETMSGTYPE_Ack: { msg = new AckMsgEx(); } break;
case NETMSGTYPE_AuthenticateChannel: { msg = new AuthenticateChannelMsgEx(); } break;
case NETMSGTYPE_GenericResponse: { msg = new GenericResponseMsg(); } break;
case NETMSGTYPE_SetChannelDirect: { msg = new SetChannelDirectMsgEx(); } break;
case NETMSGTYPE_PeerInfo: { msg = new PeerInfoMsgEx(); } break;
// nodes messages
case NETMSGTYPE_ChangeTargetConsistencyStatesResp: { msg = new ChangeTargetConsistencyStatesRespMsg(); } break;
case NETMSGTYPE_GenericDebug: { msg = new GenericDebugMsgEx(); } break;
case NETMSGTYPE_GetClientStats: { msg = new GetClientStatsMsgEx(); } break;
case NETMSGTYPE_GetMirrorBuddyGroupsResp: { msg = new GetMirrorBuddyGroupsRespMsg(); } break;
case NETMSGTYPE_GetNodesResp: { msg = new GetNodesRespMsg(); } break;
case NETMSGTYPE_GetStatesAndBuddyGroupsResp: { msg = new GetStatesAndBuddyGroupsRespMsg(); } break;
case NETMSGTYPE_GetStoragePoolsResp: { msg = new GetStoragePoolsRespMsg(); } break;
case NETMSGTYPE_GetTargetMappingsResp: { msg = new GetTargetMappingsRespMsg(); } break;
case NETMSGTYPE_GetTargetStatesResp: { msg = new GetTargetStatesRespMsg(); } break;
case NETMSGTYPE_HeartbeatRequest: { msg = new HeartbeatRequestMsgEx(); } break;
case NETMSGTYPE_Heartbeat: { msg = new HeartbeatMsgEx(); } break;
case NETMSGTYPE_MapTargets: { msg = new MapTargetsMsgEx(); } break;
case NETMSGTYPE_PublishCapacities: { msg = new PublishCapacitiesMsgEx(); } break;
case NETMSGTYPE_MapTargetsResp: { msg = new MapTargetsRespMsg(); } break;
case NETMSGTYPE_StorageBenchControlMsg: {msg = new StorageBenchControlMsgEx(); } break;
case NETMSGTYPE_RefreshStoragePools: { msg = new RefreshStoragePoolsMsgEx(); } break;
case NETMSGTYPE_RefreshTargetStates: { msg = new RefreshTargetStatesMsgEx(); } break;
case NETMSGTYPE_RegisterNodeResp: { msg = new RegisterNodeRespMsg(); } break;
case NETMSGTYPE_RegisterTargetResp: { msg = new RegisterTargetRespMsg(); } break;
case NETMSGTYPE_RemoveBuddyGroup: { msg = new RemoveBuddyGroupMsgEx(); } break;
case NETMSGTYPE_RemoveNode: { msg = new RemoveNodeMsgEx(); } break;
case NETMSGTYPE_RemoveNodeResp: { msg = new RemoveNodeRespMsg(); } break;
case NETMSGTYPE_SetMirrorBuddyGroup: { msg = new SetMirrorBuddyGroupMsgEx(); } break;
case NETMSGTYPE_SetTargetConsistencyStates: { msg = new SetTargetConsistencyStatesMsgEx(); } break;
case NETMSGTYPE_SetTargetConsistencyStatesResp: { msg = new SetTargetConsistencyStatesRespMsg(); } break;
case NETMSGTYPE_GetTargetConsistencyStates: { msg = new GetTargetConsistencyStatesMsgEx(); } break;
case NETMSGTYPE_GetTargetConsistencyStatesResp: { msg = new GetTargetConsistencyStatesRespMsg(); } break;
// storage messages
case NETMSGTYPE_CpChunkPaths: { msg = new CpChunkPathsMsgEx(); } break;
case NETMSGTYPE_CpChunkPathsResp: { msg = new CpChunkPathsRespMsg(); } break;
case NETMSGTYPE_FindOwnerResp: { msg = new FindOwnerRespMsg(); } break;
case NETMSGTYPE_GetChunkFileAttribs: { msg = new GetChunkFileAttribsMsgEx(); } break;
case NETMSGTYPE_GetHighResStats: { msg = new GetHighResStatsMsgEx(); } break;
case NETMSGTYPE_GetQuotaInfo: {msg = new GetQuotaInfoMsgEx(); } break;
case NETMSGTYPE_GetStorageResyncStats: { msg = new GetStorageResyncStatsMsgEx(); } break;
case NETMSGTYPE_ListChunkDirIncremental: { msg = new ListChunkDirIncrementalMsgEx(); } break;
case NETMSGTYPE_ListChunkDirIncrementalResp: { msg = new ListChunkDirIncrementalRespMsg(); } break;
case NETMSGTYPE_RequestExceededQuotaResp: {msg = new RequestExceededQuotaRespMsg(); } break;
case NETMSGTYPE_ResyncLocalFile: { msg = new ResyncLocalFileMsgEx(); } break;
case NETMSGTYPE_ResyncLocalFileResp: { msg = new ResyncLocalFileRespMsg(); } break;
case NETMSGTYPE_RmChunkPaths: { msg = new RmChunkPathsMsgEx(); } break;
case NETMSGTYPE_RmChunkPathsResp: { msg = new RmChunkPathsRespMsg(); } break;
case NETMSGTYPE_SetExceededQuota: {msg = new SetExceededQuotaMsgEx(); } break;
case NETMSGTYPE_SetLastBuddyCommOverride: { msg = new SetLastBuddyCommOverrideMsgEx(); } break;
case NETMSGTYPE_SetLocalAttr: { msg = new SetLocalAttrMsgEx(); } break;
case NETMSGTYPE_SetLocalAttrResp: { msg = new SetLocalAttrRespMsg(); } break;
case NETMSGTYPE_SetStorageTargetInfoResp: { msg = new SetStorageTargetInfoRespMsg(); } break;
case NETMSGTYPE_StatStoragePath: { msg = new StatStoragePathMsgEx(); } break;
case NETMSGTYPE_StorageResyncStarted: { msg = new StorageResyncStartedMsgEx(); } break;
case NETMSGTYPE_StorageResyncStartedResp: { msg = new StorageResyncStartedRespMsg(); } break;
case NETMSGTYPE_StripePatternUpdateResp: { msg = new StripePatternUpdateRespMsg(); } break;
case NETMSGTYPE_TruncLocalFile: { msg = new TruncLocalFileMsgEx(); } break;
case NETMSGTYPE_TruncLocalFileResp: { msg = new TruncLocalFileRespMsg(); } break;
case NETMSGTYPE_UnlinkLocalFile: { msg = new UnlinkLocalFileMsgEx(); } break;
case NETMSGTYPE_UnlinkLocalFileResp: { msg = new UnlinkLocalFileRespMsg(); } break;
// session messages
case NETMSGTYPE_CloseChunkFile: { msg = new CloseChunkFileMsgEx(); } break;
case NETMSGTYPE_CloseChunkFileResp: { msg = new CloseChunkFileRespMsg(); } break;
case NETMSGTYPE_FSyncLocalFile: { msg = new FSyncLocalFileMsgEx(); } break;
case NETMSGTYPE_ReadLocalFileV2: { msg = new ReadLocalFileV2MsgEx(); } break;
case NETMSGTYPE_WriteLocalFile: { msg = new WriteLocalFileMsgEx(); } break;
case NETMSGTYPE_WriteLocalFileResp: { msg = new WriteLocalFileRespMsg(); } break;
#ifdef BEEGFS_NVFS
case NETMSGTYPE_ReadLocalFileRDMA: { msg = new ReadLocalFileRDMAMsgEx(); } break;
case NETMSGTYPE_WriteLocalFileRDMA: { msg = new WriteLocalFileRDMAMsgEx(); } break;
#endif // BEEGFS_NVFS
// mon message
case NETMSGTYPE_RequestStorageData: { msg = new RequestStorageDataMsgEx(); } break;
// fsck
case NETMSGTYPE_DeleteChunks: { msg = new DeleteChunksMsgEx(); } break;
case NETMSGTYPE_FetchFsckChunkList: { msg = new FetchFsckChunkListMsgEx(); } break;
case NETMSGTYPE_MoveChunkFile: { msg = new MoveChunkFileMsgEx(); } break;
default:
{
msg = new SimpleMsg(NETMSGTYPE_Invalid);
} break;
}
return std::unique_ptr<NetMessage>(msg);
}

View File

@@ -0,0 +1,14 @@
#pragma once
#include <common/Common.h>
#include <common/net/message/AbstractNetMessageFactory.h>
class NetMessageFactory : public AbstractNetMessageFactory
{
public:
NetMessageFactory() {}
protected:
virtual std::unique_ptr<NetMessage> createFromMsgType(unsigned short msgType) const override;
} ;

View File

@@ -0,0 +1,22 @@
#include <program/Program.h>
#include "AckMsgEx.h"
bool AckMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("Ack incoming");
LOG_DEBUG_CONTEXT(log, 5, std::string("Value: ") + getValue() );
AcknowledgmentStore* ackStore = Program::getApp()->getAckStore();
ackStore->receivedAck(getValue() );
// note: this message does not require a response
App* app = Program::getApp();
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_ACK,
getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <common/net/message/control/AckMsg.h>
// see class AcknowledgeableMsg (fhgfs_common) for a short description
class AckMsgEx : public AckMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,19 @@
#include <program/Program.h>
#include "SetChannelDirectMsgEx.h"
bool SetChannelDirectMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("SetChannelDirect incoming");
LOG_DEBUG_CONTEXT(log, 5, std::string("Value: ") + StringTk::intToStr(getValue() ) );
ctx.getSocket()->setIsDirect(getValue() );
App* app = Program::getApp();
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_SETCHANNELDIRECT, getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <common/net/message/control/SetChannelDirectMsg.h>
// direct means the message is definitely processed on this server and not forwarded to another
class SetChannelDirectMsgEx : public SetChannelDirectMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,60 @@
#include "DeleteChunksMsgEx.h"
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
bool DeleteChunksMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "DeleteChunksMsg incoming";
App* app = Program::getApp();
ChunkStore* chunkDirStore = app->getChunkDirStore();
FsckChunkList& chunks = getChunks();
FsckChunkList failedDeletes;
for ( FsckChunkListIter iter = chunks.begin(); iter != chunks.end(); iter++ )
{
std::string chunkDirRelative;
std::string delPathStrRelative;
bool isMirrorFD = iter->getBuddyGroupID();
chunkDirRelative = iter->getSavedPath()->str();
delPathStrRelative = chunkDirRelative + "/" + iter->getID();
auto* const target = app->getStorageTargets()->getTarget(iter->getTargetID());
if (!target)
{ // unknown targetID
LogContext(logContext).logErr(std::string("Unknown targetID: ") +
StringTk::uintToStr(iter->getTargetID()));
failedDeletes.push_back(*iter);
}
else
{ // valid targetID
int targetFD = isMirrorFD ? *target->getMirrorFD() : *target->getChunkFD();
int unlinkRes = unlinkat(targetFD, delPathStrRelative.c_str(), 0);
if ( (unlinkRes == -1) && (errno != ENOENT) )
{ // error
LogContext(logContext).logErr(
"Unable to unlink file: " + delPathStrRelative + ". " + "SysErr: "
+ System::getErrString());
failedDeletes.push_back(*iter);
}
// Now try to rmdir chunkDirPath (checks if it is empty)
if (unlinkRes == 0)
{
Path chunkDirRelativeVec(chunkDirRelative);
chunkDirStore->rmdirChunkDirPath(targetFD, &chunkDirRelativeVec);
}
}
}
ctx.sendResponse(DeleteChunksRespMsg(&failedDeletes) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/DeleteChunksMsg.h>
#include <common/net/message/fsck/DeleteChunksRespMsg.h>
class DeleteChunksMsgEx : public DeleteChunksMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,53 @@
#include "FetchFsckChunkListMsgEx.h"
#include <program/Program.h>
bool FetchFsckChunkListMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
ChunkFetcher* chunkFetcher = app->getChunkFetcher();
FetchFsckChunkListStatus status;
FsckChunkList chunkList;
if (getLastStatus() == FetchFsckChunkListStatus_NOTSTARTED)
{
// This is the first message of a new Fsck run
if (chunkFetcher->getNumRunning() != 0 || !chunkFetcher->isQueueEmpty())
{
// another fsck is already in progress
if (!getForceRestart())
{
LOG(GENERAL, NOTICE, "Received request to start fsck although previous run is not finished. "
"Not starting.", ("From", ctx.peerName()));
ctx.sendResponse(FetchFsckChunkListRespMsg(&chunkList,
FetchFsckChunkListStatus_NOTSTARTED));
return true;
}
else
{
LOG(GENERAL, NOTICE, "Aborting previous fsck chunk fetcher run by user request.",
("From", ctx.peerName()));
chunkFetcher->stopFetching();
chunkFetcher->waitForStopFetching();
}
}
chunkFetcher->startFetching();
}
if(chunkFetcher->getIsBad())
status = FetchFsckChunkListStatus_READERROR;
else if (chunkFetcher->getNumRunning() == 0)
status = FetchFsckChunkListStatus_FINISHED;
else
status = FetchFsckChunkListStatus_RUNNING;
chunkFetcher->getAndDeleteChunks(chunkList, getMaxNumChunks());
ctx.sendResponse(FetchFsckChunkListRespMsg(&chunkList, status));
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/fsck/FetchFsckChunkListMsg.h>
#include <common/net/message/fsck/FetchFsckChunkListRespMsg.h>
class FetchFsckChunkListMsgEx : public FetchFsckChunkListMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,88 @@
#include "MoveChunkFileMsgEx.h"
#include <program/Program.h>
bool MoveChunkFileMsgEx::processIncoming(ResponseContext& ctx)
{
ctx.sendResponse(MoveChunkFileRespMsg(moveChunk()));
return true;
}
unsigned MoveChunkFileMsgEx::moveChunk()
{
const char* logContext = "MoveChunkFileMsg incoming";
App* app = Program::getApp();
std::string chunkName = this->getChunkName();
std::string oldPath = this->getOldPath(); // relative path to chunks dir
std::string newPath = this->getNewPath(); // relative path to chunks dir
uint16_t targetID = this->getTargetID();
bool overwriteExisting = this->getOverwriteExisting();
int renameRes;
std::string moveFrom = oldPath + "/" + chunkName;
std::string moveTo = newPath + "/" + chunkName;
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
LogContext(logContext).log(Log_CRITICAL, "Could not open path for target ID; targetID: "
+ StringTk::uintToStr(targetID));
return 1;
}
const auto targetPath = getIsMirrored()
? target->getPath() / CONFIG_BUDDYMIRROR_SUBDIR_NAME
: target->getPath() / CONFIG_CHUNK_SUBDIR_NAME;
const int targetFD = getIsMirrored() ? *target->getMirrorFD() : *target->getChunkFD();
// if overwriteExisting set to false, make sure, that output file does not exist
if (!overwriteExisting)
{
bool pathExists = StorageTk::pathExists(targetFD, moveTo);
if (pathExists)
{
LogContext(logContext).log(Log_CRITICAL,
"Could not move chunk file. Destination file does already exist; chunkID: " + chunkName
+ "; targetID: " + StringTk::uintToStr(targetID) + "; oldChunkPath: " + oldPath
+ "; newChunkPath: " + newPath);
return 1;
}
}
{
// create the parent directory (perhaps it didn't exist)
// can be more efficient if we write a createPathOnDisk that uses mkdirat
const Path moveToPath = targetPath / moveTo;
mode_t dirMode = S_IRWXU | S_IRWXG | S_IRWXO;
bool mkdirRes = StorageTk::createPathOnDisk(moveToPath, true, &dirMode);
if(!mkdirRes)
{
LogContext(logContext).log(Log_CRITICAL,
"Could not create parent directory for chunk; chunkID: " + chunkName + "; targetID: "
+ StringTk::uintToStr(targetID) + "; oldChunkPath: " + oldPath + "; newChunkPath: "
+ newPath);
return 1;
}
}
// perform the actual move
renameRes = renameat(targetFD, moveFrom.c_str(), targetFD, moveTo.c_str() );
if ( renameRes != 0 )
{
LogContext(logContext).log(Log_CRITICAL,
"Could not perform move; chunkID: " + chunkName + "; targetID: "
+ StringTk::uintToStr(targetID) + "; oldChunkPath: " + oldPath + "; newChunkPath: "
+ newPath + "; SysErr: " + System::getErrString());
return 1;
}
else if (getIsMirrored())
target->setBuddyNeedsResync(true);
return 0;
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/MoveChunkFileMsg.h>
#include <common/net/message/fsck/MoveChunkFileRespMsg.h>
class MoveChunkFileMsgEx : public MoveChunkFileMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
unsigned moveChunk();
};

View File

@@ -0,0 +1,68 @@
#include "RequestStorageDataMsgEx.h"
bool RequestStorageDataMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
Node& node = app->getLocalNode();
MultiWorkQueueMap* workQueueMap = app->getWorkQueueMap();
StorageTargets* storageTargets = app->getStorageTargets();
// get disk space of each target
StorageTargetInfoList storageTargetInfoList;
storageTargets->generateTargetInfoList(storageTargetInfoList);
// compute total disk space and total free space
int64_t diskSpaceTotal = 0; // sum of all targets
int64_t diskSpaceFree = 0; // sum of all targets
for(StorageTargetInfoListIter iter = storageTargetInfoList.begin();
iter != storageTargetInfoList.end();
iter++)
{
if(diskSpaceTotal == -1)
continue; // statfs() failed on this target
diskSpaceTotal += iter->getDiskSpaceTotal();
diskSpaceFree += iter->getDiskSpaceFree();
}
unsigned sessionCount = app->getSessions()->getSize();
NicAddressList nicList(node.getNicList());
std::string hostnameid = System::getHostname();
// highresStats
HighResStatsList statsHistory;
uint64_t lastStatsMS = getValue();
// get stats history
StatsCollector* statsCollector = app->getStatsCollector();
statsCollector->getStatsSince(lastStatsMS, statsHistory);
// get work queue stats
unsigned indirectWorkListSize = 0;
unsigned directWorkListSize = 0;
for(MultiWorkQueueMapCIter iter = workQueueMap->begin(); iter != workQueueMap->end(); iter++)
{
indirectWorkListSize += iter->second->getIndirectWorkListSize();
directWorkListSize += iter->second->getDirectWorkListSize();
}
RequestStorageDataRespMsg requestStorageDataRespMsg(node.getAlias(), hostnameid, node.getNumID(),
&nicList, indirectWorkListSize, directWorkListSize, diskSpaceTotal, diskSpaceFree,
sessionCount, &statsHistory, &storageTargetInfoList);
ctx.sendResponse(requestStorageDataRespMsg);
LOG_DEBUG(__func__, Log_SPAM, std::string("Sent a message with type: " ) +
StringTk::uintToStr(requestStorageDataRespMsg.getMsgType() ) + std::string(" to mon") );
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_REQUESTSTORAGEDATA, getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,20 @@
#pragma once
#include <app/App.h>
#include <common/app/log/LogContext.h>
#include <common/components/worker/queue/MultiWorkQueue.h>
#include <common/net/message/mon/RequestStorageDataMsg.h>
#include <common/storage/StorageErrors.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageTargetInfo.h>
#include <common/toolkit/MessagingTk.h>
#include <common/net/message/mon/RequestStorageDataRespMsg.h>
#include <program/Program.h>
class RequestStorageDataMsgEx : public RequestStorageDataMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,428 @@
#include <common/net/message/nodes/GenericDebugRespMsg.h>
#include <common/net/msghelpers/MsgHelperGenericDebug.h>
#include <common/storage/quota/Quota.h>
#include <common/storage/StoragePoolId.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include <session/ZfsSession.h>
#include <toolkit/QuotaTk.h>
#include "GenericDebugMsgEx.h"
#define GENDBGMSG_OP_LISTOPENFILES "listopenfiles"
#define GENDBGMSG_OP_VERSION "version"
#define GENDBGMSG_OP_MSGQUEUESTATS "msgqueuestats"
#define GENDBGMSG_OP_RESYNCQUEUELEN "resyncqueuelen"
#define GENDBGMSG_OP_CHUNKLOCKSTORESIZE "chunklockstoresize"
#define GENDBGMSG_OP_CHUNKLOCKSTORECONTENTS "chunklockstore"
#define GENDBGMSG_OP_SETREJECTIONRATE "setrejectionrate"
bool GenericDebugMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("GenericDebugMsg incoming");
LOG_DEBUG_CONTEXT(log, 5, std::string("Command string: ") + getCommandStr() );
std::string cmdRespStr = processCommand();
ctx.sendResponse(GenericDebugRespMsg(cmdRespStr.c_str() ) );
App* app = Program::getApp();
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_GENERICDEBUG,
getMsgHeaderUserID() );
return true;
}
/**
* @return command response string
*/
std::string GenericDebugMsgEx::processCommand()
{
App* app = Program::getApp();
Config* cfg = app->getConfig();
std::string responseStr;
std::string operation;
// load command string into a stream to allow us to use getline
std::istringstream commandStream(getCommandStr() );
// get operation type from command string
std::getline(commandStream, operation, ' ');
if(operation == GENDBGMSG_OP_LISTOPENFILES)
responseStr = processOpListOpenFiles(commandStream);
else
if(operation == GENDBGMSG_OP_VERSION)
responseStr = processOpVersion(commandStream);
else
if(operation == GENDBGMSG_OP_MSGQUEUESTATS)
responseStr = processOpMsgQueueStats(commandStream);
else
if(operation == GENDBGMSG_OP_VARLOGMESSAGES)
responseStr = MsgHelperGenericDebug::processOpVarLogMessages(commandStream);
else
if(operation == GENDBGMSG_OP_VARLOGKERNLOG)
responseStr = MsgHelperGenericDebug::processOpVarLogKernLog(commandStream);
else
if(operation == GENDBGMSG_OP_FHGFSLOG)
responseStr = MsgHelperGenericDebug::processOpFhgfsLog(commandStream);
else
if(operation == GENDBGMSG_OP_LOADAVG)
responseStr = MsgHelperGenericDebug::processOpLoadAvg(commandStream);
else
if(operation == GENDBGMSG_OP_DROPCACHES)
responseStr = MsgHelperGenericDebug::processOpDropCaches(commandStream);
else
if(operation == GENDBGMSG_OP_GETCFG)
responseStr = MsgHelperGenericDebug::processOpCfgFile(commandStream, cfg->getCfgFile() );
else
if(operation == GENDBGMSG_OP_GETLOGLEVEL)
responseStr = MsgHelperGenericDebug::processOpGetLogLevel(commandStream);
else
if(operation == GENDBGMSG_OP_SETLOGLEVEL)
responseStr = MsgHelperGenericDebug::processOpSetLogLevel(commandStream);
else
if(operation == GENDBGMSG_OP_NETOUT)
responseStr = MsgHelperGenericDebug::processOpNetOut(commandStream,
app->getMgmtNodes(), app->getMetaNodes(), app->getStorageNodes() );
else
if(operation == GENDBGMSG_OP_QUOTAEXCEEDED)
responseStr = processOpQuotaExceeded(commandStream);
else
if(operation == GENDBGMSG_OP_USEDQUOTA)
responseStr = processOpUsedQuota(commandStream);
else
if(operation == GENDBGMSG_OP_RESYNCQUEUELEN)
responseStr = processOpResyncQueueLen(commandStream);
else
if(operation == GENDBGMSG_OP_CHUNKLOCKSTORESIZE)
responseStr = processOpChunkLockStoreSize(commandStream);
else
if(operation == GENDBGMSG_OP_CHUNKLOCKSTORECONTENTS)
responseStr = processOpChunkLockStoreContents(commandStream);
else
if(operation == GENDBGMSG_OP_LISTSTORAGESTATES)
responseStr = MsgHelperGenericDebug::processOpListTargetStates(commandStream,
app->getTargetStateStore() );
else
if(operation == GENDBGMSG_OP_SETREJECTIONRATE)
responseStr = processOpSetRejectionRate(commandStream);
else
responseStr = "Unknown/invalid operation";
return responseStr;
}
std::string GenericDebugMsgEx::processOpListOpenFiles(std::istringstream& commandStream)
{
// protocol: no arguments
App* app = Program::getApp();
SessionStore* sessions = app->getSessions();
std::ostringstream responseStream;
NumNodeIDList sessionIDs;
size_t numFilesTotal = 0;
size_t numCheckedSessions = 0; // may defer from number of initially queried sessions
size_t numSessions = sessions->getAllSessionIDs(&sessionIDs);
responseStream << "Found " << numSessions << " sessions." << std::endl;
responseStream << std::endl;
// walk over all sessions
for(NumNodeIDListCIter iter = sessionIDs.begin(); iter != sessionIDs.end(); iter++)
{
// note: sessionID might have become removed since we queried it, e.g. because client is gone
auto session = sessions->referenceSession(*iter);
if(!session)
continue;
numCheckedSessions++;
SessionLocalFileStore* sessionFiles = session->getLocalFiles();
size_t numFiles = sessionFiles->getSize();
if(!numFiles)
continue; // only print sessions with open files
numFilesTotal += numFiles;
responseStream << *iter << ": " << numFiles << std::endl;
}
responseStream << std::endl;
responseStream << "Final results: " << numFilesTotal << " open files in " <<
numCheckedSessions << " checked sessions";
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpVersion(std::istringstream& commandStream)
{
return BEEGFS_VERSION;
}
std::string GenericDebugMsgEx::processOpMsgQueueStats(std::istringstream& commandStream)
{
// protocol: no arguments
App* app = Program::getApp();
MultiWorkQueueMap* workQueueMap = app->getWorkQueueMap();
std::ostringstream responseStream;
std::string indirectQueueStats;
std::string directQueueStats;
std::string busyStats;
for(MultiWorkQueueMapCIter iter = workQueueMap->begin(); iter != workQueueMap->end(); iter++)
{
MultiWorkQueue* workQ = iter->second;
workQ->getStatsAsStr(indirectQueueStats, directQueueStats, busyStats);
responseStream << "* [queue id " << iter->first << "] "
"general queue stats: " << std::endl <<
indirectQueueStats << std::endl;
responseStream << "* [queue id " << iter->first << "] "
"direct queue stats: " << std::endl <<
directQueueStats << std::endl;
responseStream << "* [queue id " << iter->first << "] "
"busy worker stats: " << std::endl <<
busyStats << std::endl;
}
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpQuotaExceeded(std::istringstream& commandStream)
{
App* app = Program::getApp();
std::string targetIdStr;
std::getline(commandStream, targetIdStr, ' ');
uint16_t targetId = StringTk::strToUInt(targetIdStr);
if(!app->getConfig()->getQuotaEnableEnforcement() )
return "No quota exceeded IDs on this storage daemon because quota enforcement is"
"disabled.";
ExceededQuotaStorePtr exQuotaStore = app->getExceededQuotaStores()->get(targetId);
// exQuotaStore may be null;needs to be checked in MsgHelperGenericDebug::processOpQuotaExceeded
return MsgHelperGenericDebug::processOpQuotaExceeded(commandStream, exQuotaStore.get());
}
std::string GenericDebugMsgEx::processOpUsedQuota(std::istringstream& commandStream)
{
App *app = Program::getApp();
std::ostringstream responseStream;
ZfsSession session;
QuotaDataType quotaDataType = QuotaDataType_NONE;
std::string quotaDataTypeStr;
bool forEachTarget = false;
unsigned rangeStart = 0;
unsigned rangeEnd = 0;
// get parameter from command string
std::string inputString;
while(!commandStream.eof() )
{
std::getline(commandStream, inputString, ' ');
if(inputString == "uid")
{
quotaDataType = QuotaDataType_USER;
quotaDataTypeStr = "user";
}
else
if(inputString == "gid")
{
quotaDataType = QuotaDataType_GROUP;
quotaDataTypeStr = "group";
}
else
if(inputString == "forEachTarget")
forEachTarget = true;
else
if(inputString == "range")
{
std::string rangeValue;
std::getline(commandStream, rangeValue, ' ');
rangeStart = StringTk::strToUInt(rangeValue);
std::getline(commandStream, rangeValue, ' ');
rangeEnd = StringTk::strToUInt(rangeValue);
}
}
// verify given parameters
if(quotaDataType == QuotaDataType_NONE)
return "Invalid or missing quota data type argument.";
if(rangeStart == 0 && rangeEnd == 0)
return "Invalid or missing range argument.";
if(forEachTarget)
{
const auto& targets = app->getStorageTargets()->getTargets();
responseStream << "Quota data of " << targets.size() << " targets." << std::endl;
for (const auto& mapping : targets)
{
const auto& target = *mapping.second;
QuotaDataList outQuotaDataList;
QuotaBlockDeviceMap quotaBlockDevices = {
{mapping.first, target.getQuotaBlockDevice()}
};
QuotaTk::requestQuotaForRange(&quotaBlockDevices, rangeStart, rangeEnd, quotaDataType,
&outQuotaDataList, &session);
responseStream << outQuotaDataList.size() << " used quota for " << quotaDataTypeStr
<< " IDs on target: " << mapping.first << std::endl;
QuotaData::quotaDataListToString(outQuotaDataList, &responseStream);
}
}
else
{
auto& targets = app->getStorageTargets()->getTargets();
QuotaBlockDeviceMap quotaBlockDevices;
std::transform(
targets.begin(), targets.end(),
std::inserter(quotaBlockDevices, quotaBlockDevices.end()),
[] (const auto& target) {
return std::make_pair(target.first, target.second->getQuotaBlockDevice());
});
QuotaDataList outQuotaDataList;
QuotaTk::requestQuotaForRange(&quotaBlockDevices, rangeStart, rangeEnd, quotaDataType,
&outQuotaDataList, &session);
QuotaData::quotaDataListToString(outQuotaDataList, &responseStream);
}
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpResyncQueueLen(std::istringstream& commandStream)
{
// protocol: targetID files/dirs as argument (e.g. "resyncqueuelen 1234 files")
// get parameter from command string
std::string targetIDStr;
uint16_t targetID;
std::string typeStr;
std::getline(commandStream, targetIDStr, ' ');
std::getline(commandStream, typeStr, ' ');
targetID = StringTk::strToUInt(targetIDStr);
if (targetID == 0)
return "Invalid or missing targetID";
BuddyResyncJob* resyncJob = Program::getApp()->getBuddyResyncer()->getResyncJob(targetID);
if (!resyncJob)
return "0";
if (typeStr == "files")
{
size_t count = resyncJob->syncCandidates.getNumFiles();
return StringTk::uintToStr(count);
}
else
if (typeStr == "dirs")
{
size_t count = resyncJob->syncCandidates.getNumDirs();
return StringTk::uintToStr(count);
}
else
return "Invalid or missing queue type";
}
std::string GenericDebugMsgEx::processOpChunkLockStoreSize(std::istringstream& commandStream)
{
// protocol: targetID as argument (e.g. "chunklockstoresize 1234")
// get parameter from command string
std::string targetIDStr;
uint16_t targetID;
std::getline(commandStream, targetIDStr, ' ');
targetID = StringTk::strToUInt(targetIDStr);
if (targetID == 0)
return "Invalid or missing targetID";
size_t lockStoreSize = Program::getApp()->getChunkLockStore()->getSize(targetID);
return StringTk::uintToStr(lockStoreSize);
}
std::string GenericDebugMsgEx::processOpChunkLockStoreContents(std::istringstream& commandStream)
{
// protocol: targetID and size limit (optional) as argument (e.g. "chunklockstoresize 1234 50")
std::stringstream outStream;
// get parameter from command string
std::string targetIDStr;
uint16_t targetID;
std::string maxEntriesStr;
unsigned maxEntries;
std::getline(commandStream, targetIDStr, ' ');
targetID = StringTk::strToUInt(targetIDStr);
std::getline(commandStream, maxEntriesStr, ' ');
maxEntries = StringTk::strToUInt(maxEntriesStr);
if (targetID == 0)
return "Invalid or missing targetID";
StringSet lockStoreContents = Program::getApp()->getChunkLockStore()->getLockStoreCopy(targetID);
unsigned lockStoreSize = lockStoreContents.size();
StringSetIter lockStoreIter = lockStoreContents.begin();
if ( (maxEntries == 0) || (maxEntries > lockStoreSize) )
maxEntries = lockStoreSize;
for (unsigned i = 0; i < maxEntries; i++)
{
outStream << *lockStoreIter << std::endl;
lockStoreIter++;
}
return outStream.str();
}
std::string GenericDebugMsgEx::processOpSetRejectionRate(std::istringstream& commandStream)
{
App* app = Program::getApp();
Config* cfg = app->getConfig();
std::string rejectionRateStr;
std::ostringstream responseStream;
std::getline(commandStream, rejectionRateStr, ' ');
unsigned rejectionRate = StringTk::strToUInt(rejectionRateStr);
cfg->setConnectionRejectionRate(rejectionRate);
responseStream << "Setting connection reject rate to " << rejectionRate << std::endl;
return responseStream.str();
}

View File

@@ -0,0 +1,24 @@
#pragma once
#include <common/net/message/nodes/GenericDebugMsg.h>
class GenericDebugMsgEx : public GenericDebugMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
std::string processCommand();
std::string processOpListOpenFiles(std::istringstream& commandStream);
std::string processOpVersion(std::istringstream& commandStream);
std::string processOpMsgQueueStats(std::istringstream& commandStream);
std::string processOpQuotaExceeded(std::istringstream& commandStream);
std::string processOpUsedQuota(std::istringstream& commandStream);
std::string processOpResyncQueueLen(std::istringstream& commandStream);
std::string processOpChunkLockStoreSize(std::istringstream& commandStream);
std::string processOpChunkLockStoreContents(std::istringstream& commandStream);
std::string processOpSetRejectionRate(std::istringstream& commandStream);
};

View File

@@ -0,0 +1,30 @@
#include <program/Program.h>
#include <common/net/message/storage/GetHighResStatsRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <common/nodes/OpCounter.h>
#include "GetClientStatsMsgEx.h"
#include <nodes/StorageNodeOpStats.h>
#include <common/net/message/nodes/GetClientStatsRespMsg.h>
/**
* Server side, called when the server gets a GetClientStatsMsgEx request
*/
bool GetClientStatsMsgEx::processIncoming(ResponseContext& ctx)
{
uint64_t cookieIP = getCookieIP(); // requested is cookie+1
// get stats
StorageNodeOpStats* clientOpStats = Program::getApp()->getNodeOpStats();
bool wantPerUserStats = isMsgHeaderFeatureFlagSet(GETCLIENTSTATSMSG_FLAG_PERUSERSTATS);
UInt64Vector opStatsVec;
clientOpStats->mapToUInt64Vec(
cookieIP, GETCLIENTSTATSRESP_MAX_PAYLOAD_LEN, wantPerUserStats, &opStatsVec);
ctx.sendResponse(GetClientStatsRespMsg(&opStatsVec) );
return true;
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include <common/storage/StorageErrors.h>
#include <common/net/message/nodes/GetClientStatsMsg.h>
// NOTE: The message factory requires this object to have 'deserialize' and
// 'processIncoming' methods. 'deserialize' is derived from other classes.
class GetClientStatsMsgEx : public GetClientStatsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,24 @@
#include <program/Program.h>
#include <storage/StorageTargets.h>
#include <common/net/message/nodes/GetTargetConsistencyStatesRespMsg.h>
#include "GetTargetConsistencyStatesMsgEx.h"
bool GetTargetConsistencyStatesMsgEx::processIncoming(ResponseContext& ctx)
{
StorageTargets* storageTargets = Program::getApp()->getStorageTargets();
TargetConsistencyStateVec states;
std::transform(
targetIDs.begin(), targetIDs.end(),
std::back_inserter(states),
[storageTargets] (uint16_t targetID) {
auto* const target = storageTargets->getTarget(targetID);
return target ? target->getConsistencyState() : TargetConsistencyState_BAD;
});
ctx.sendResponse(GetTargetConsistencyStatesRespMsg(states));
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/GetTargetConsistencyStatesMsg.h>
class GetTargetConsistencyStatesMsgEx : public GetTargetConsistencyStatesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,76 @@
#include <common/net/sock/NetworkInterfaceCard.h>
#include <program/Program.h>
#include "HeartbeatMsgEx.h"
#include <boost/lexical_cast.hpp>
bool HeartbeatMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("Heartbeat incoming");
App* app = Program::getApp();
bool isNodeNew;
// construct node
NicAddressList& nicList = getNicList();
auto node = std::make_shared<Node>(getNodeType(), getNodeID(), getNodeNumID(), getPortUDP(),
getPortTCP(), nicList);
// set local nic capabilities
NicAddressList localNicList(app->getLocalNicList() );
NicListCapabilities localNicCaps;
NetworkInterfaceCard::supportedCapabilities(&localNicList, &localNicCaps);
node->getConnPool()->setLocalNicList(localNicList, localNicCaps);
std::string nodeIDWithTypeStr = node->getNodeIDWithTypeStr();
log.log(Log_DEBUG, std::string("Heartbeat node: ") + nodeIDWithTypeStr);
// add/update node in store
AbstractNodeStore* nodes;
switch(getNodeType() )
{
case NODETYPE_Meta:
nodes = app->getMetaNodes(); break;
case NODETYPE_Mgmt:
nodes = app->getMgmtNodes(); break;
case NODETYPE_Storage:
nodes = app->getStorageNodes(); break;
default:
{
log.logErr("Invalid/unexpected node type: "
+ boost::lexical_cast<std::string>(getNodeType()));
goto ack_resp;
} break;
}
isNodeNew = (nodes->addOrUpdateNode(std::move(node)) == NodeStoreResult::Added);
if(isNodeNew)
{ // log info about new server
bool supportsRDMA = NetworkInterfaceCard::supportsRDMA(&nicList);
log.log(Log_WARNING, std::string("New node: ") +
nodeIDWithTypeStr + "; " +
std::string(supportsRDMA ? "RDMA; " : "") );
}
ack_resp:
acknowledge(ctx);
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_HEARTBEAT,
getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/HeartbeatMsg.h>
class HeartbeatMsgEx : public HeartbeatMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,28 @@
#include <common/net/message/nodes/HeartbeatMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "HeartbeatRequestMsgEx.h"
bool HeartbeatRequestMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("Heartbeat request incoming");
App* app = Program::getApp();
Config* cfg = app->getConfig();
Node& localNode = app->getLocalNode();
NumNodeID localNodeNumID = localNode.getNumID();
NicAddressList nicList(localNode.getNicList() );
HeartbeatMsg hbMsg(localNode.getAlias(), localNodeNumID, NODETYPE_Storage, &nicList);
hbMsg.setPorts(cfg->getConnStoragePort(), cfg->getConnStoragePort() );
ctx.sendResponse(hbMsg);
log.log(Log_DEBUG, std::string("Heartbeat req ip:") + StringTk::uintToHexStr(ctx.getSocket()->getPeerIP()));
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_HEARTBEAT,
getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
class HeartbeatRequestMsgEx : public HeartbeatRequestMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,44 @@
#include <common/net/message/nodes/MapTargetsRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <common/toolkit/ZipIterator.h>
#include <program/Program.h>
#include "MapTargetsMsgEx.h"
bool MapTargetsMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("MapTargetsMsg incoming");
const App* app = Program::getApp();
const NodeStoreServers* storageNodes = app->getStorageNodes();
TargetMapper* targetMapper = app->getTargetMapper();
const NumNodeID nodeID = getNodeID();
std::map<uint16_t, FhgfsOpsErr> results;
for (const auto mapping : getTargets())
{
const auto targetId = mapping.first;
const auto poolId = mapping.second;
const auto mapRes = targetMapper->mapTarget(targetId, nodeID, poolId);
results[targetId] = mapRes.first;
if ( (mapRes.first != FhgfsOpsErr_SUCCESS) && (mapRes.second) )
{ // target could be mapped and is new
LOG_DEBUG_CONTEXT(log, Log_WARNING, "Mapping "
"target " + StringTk::uintToStr(targetId) +
" => " +
storageNodes->getNodeIDWithTypeStr(nodeID) );
IGNORE_UNUSED_VARIABLE(storageNodes);
}
}
if(!acknowledge(ctx) )
ctx.sendResponse(MapTargetsRespMsg(results));
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/nodes/MapTargetsMsg.h>
class MapTargetsMsgEx : public MapTargetsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,21 @@
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "PublishCapacitiesMsgEx.h"
bool PublishCapacitiesMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
InternodeSyncer* syncer = app->getInternodeSyncer();
// force upload of capacity information
syncer->setForcePublishCapacities();
// send response
acknowledge(ctx);
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/nodes/PublishCapacitiesMsg.h>
class PublishCapacitiesMsgEx : public PublishCapacitiesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,21 @@
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "RefreshTargetStatesMsgEx.h"
bool RefreshTargetStatesMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
InternodeSyncer* syncer = app->getInternodeSyncer();
// force update of capacity pools
syncer->setForceTargetStatesUpdate();
// send response
acknowledge(ctx);
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/nodes/RefreshTargetStatesMsg.h>
class RefreshTargetStatesMsgEx : public RefreshTargetStatesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,124 @@
#include "RemoveBuddyGroupMsgEx.h"
#include <common/net/message/nodes/RemoveBuddyGroupRespMsg.h>
#include <net/message/storage/listing/ListChunkDirIncrementalMsgEx.h>
#include <program/Program.h>
static FhgfsOpsErr checkChunkDirRemovable(const int dirFD)
{
DIR* dir = fdopendir(dirFD);
std::unique_ptr<DIR, StorageTk::CloseDirDeleter> _dir(dir);
while (true)
{
struct dirent* result;
#if USE_READDIR_R
struct dirent buffer;
if (readdir_r(dir, &buffer, &result) != 0)
break;
#else
errno = 0;
result = readdir(dir);
if (!result && errno)
break;
#endif
if (!result)
return FhgfsOpsErr_SUCCESS;
if (strcmp(result->d_name, ".") == 0 || strcmp(result->d_name, "..") == 0)
continue;
struct stat statData;
const int statRes = ::fstatat(dirfd(dir), result->d_name, &statData, AT_SYMLINK_NOFOLLOW);
if (statRes != 0)
{
LOG(MIRRORING, ERR, "Could not stat something in chunk directory.");
return FhgfsOpsErr_INTERNAL;
}
if (!S_ISDIR(statData.st_mode))
return FhgfsOpsErr_NOTEMPTY;
const int subdir = ::openat(dirfd(dir), result->d_name, O_RDONLY);
if (subdir < 0)
{
LOG(MIRRORING, ERR, "Could not open directory in chunk path.");
return FhgfsOpsErr_INTERNAL;
}
const FhgfsOpsErr checkRes = checkChunkDirRemovable(subdir);
if (checkRes != FhgfsOpsErr_SUCCESS)
return checkRes;
}
return FhgfsOpsErr_INTERNAL;
}
bool RemoveBuddyGroupMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
if (type != NODETYPE_Storage)
{
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
return true;
}
uint16_t targetID = app->getMirrorBuddyGroupMapper()->getPrimaryTargetID(groupID);
if (app->getTargetMapper()->getNodeID(targetID) != app->getLocalNode().getNumID())
targetID = app->getMirrorBuddyGroupMapper()->getSecondaryTargetID(groupID);
if (app->getTargetMapper()->getNodeID(targetID) != app->getLocalNode().getNumID())
{
LOG(MIRRORING, ERR, "Group is not mapped on this target.", groupID);
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
return true;
}
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
LOG(MIRRORING, ERR, "Could not open directory file descriptor.", groupID);
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
return true;
}
const int dirFD = openat(*target->getMirrorFD(), ".", O_RDONLY);
if (dirFD < 0)
{
LOG(MIRRORING, ERR, "Could not open directory file descriptor.", groupID);
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
return true;
}
const FhgfsOpsErr checkRes = checkChunkDirRemovable(dirFD);
const bool forceAndNotEmpty = checkRes == FhgfsOpsErr_NOTEMPTY && force;
if (checkRes == FhgfsOpsErr_SUCCESS || forceAndNotEmpty)
{
if (!checkOnly)
{
auto* const bgm = Program::getApp()->getMirrorBuddyGroupMapper();
const NumNodeID localID = Program::getApp()->getLocalNode().getNumID();
if (!bgm->unmapMirrorBuddyGroup(groupID, localID))
{
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
return true;
}
}
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_SUCCESS));
return true;
}
else
{
ctx.sendResponse(RemoveBuddyGroupRespMsg(checkRes));
return true;
}
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/RemoveBuddyGroupMsg.h>
class RemoveBuddyGroupMsgEx : public RemoveBuddyGroupMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,37 @@
#include <common/net/message/nodes/RemoveNodeRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "RemoveNodeMsgEx.h"
bool RemoveNodeMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
LOG_DBG(GENERAL, SPAM, "Removing node.", getNodeNumID());
if (getNodeType() == NODETYPE_Storage)
{
NodeStoreServers* nodes = app->getStorageNodes();
auto node = nodes->referenceNode(getNodeNumID());
bool delRes = nodes->deleteNode(getNodeNumID());
// log
if (delRes)
{
LOG(GENERAL, WARNING, "Node removed.", ("node", node->getNodeIDWithTypeStr()));
LOG(GENERAL, WARNING, "Number of nodes in the system:",
("meta", app->getMetaNodes()->getSize()),
("storage", app->getStorageNodes()->getSize()));
}
}
if (!acknowledge(ctx))
ctx.sendResponse(RemoveNodeRespMsg(0));
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_REMOVENODE,
getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/nodes/RemoveNodeMsg.h>
class RemoveNodeMsgEx : public RemoveNodeMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,37 @@
#include <common/net/message/nodes/SetMirrorBuddyGroupRespMsg.h>
#include <common/nodes/MirrorBuddyGroupMapper.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "SetMirrorBuddyGroupMsgEx.h"
bool SetMirrorBuddyGroupMsgEx::processIncoming(ResponseContext& ctx)
{
uint16_t buddyGroupID = this->getBuddyGroupID();
if (getNodeType() != NODETYPE_Storage)
{
// The storage server has no mapper for meta buddy groups - nothing to do, just acknowledge
if (!acknowledge(ctx))
ctx.sendResponse(SetMirrorBuddyGroupRespMsg(FhgfsOpsErr_SUCCESS, buddyGroupID));
return true;
}
App* app = Program::getApp();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
uint16_t primaryTargetID = this->getPrimaryTargetID();
uint16_t secondaryTargetID = this->getSecondaryTargetID();
bool allowUpdate = this->getAllowUpdate();
uint16_t newBuddyGroupID = 0;
FhgfsOpsErr mapResult = buddyGroupMapper->mapMirrorBuddyGroup(buddyGroupID, primaryTargetID,
secondaryTargetID, app->getLocalNode().getNumID(), allowUpdate, &newBuddyGroupID);
if(!acknowledge(ctx) )
ctx.sendResponse(SetMirrorBuddyGroupRespMsg(mapResult, newBuddyGroupID) );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/nodes/SetMirrorBuddyGroupMsg.h>
class SetMirrorBuddyGroupMsgEx : public SetMirrorBuddyGroupMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,40 @@
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <common/nodes/TargetStateStore.h>
#include <common/toolkit/ZipIterator.h>
#include <program/Program.h>
#include "SetTargetConsistencyStatesMsgEx.h"
bool SetTargetConsistencyStatesMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
StorageTargets* storageTargets = app->getStorageTargets();
FhgfsOpsErr result = FhgfsOpsErr_SUCCESS;
if (getTargetIDs().size() != getStates().size())
{
LogContext(__func__).logErr("Different list size of targetIDs and states");
result = FhgfsOpsErr_INTERNAL;
goto send_response;
}
for (ZipIterRange<UInt16List, UInt8List> idStateIter(getTargetIDs(), getStates());
!idStateIter.empty(); ++idStateIter)
{
auto* const target = storageTargets->getTarget(*idStateIter()->first);
if (!target)
{
LogContext(__func__).logErr("Unknown targetID: " +
StringTk::uintToStr(*(idStateIter()->first) ) );
result = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
target->setState(TargetConsistencyState(*idStateIter()->second));
}
send_response:
ctx.sendResponse(SetTargetConsistencyStatesRespMsg(result) );
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
class SetTargetConsistencyStatesMsgEx : public SetTargetConsistencyStatesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,66 @@
#include <app/App.h>
#include <common/net/message/nodes/StorageBenchControlMsgResp.h>
#include <components/benchmarker/StorageBenchOperator.h>
#include <program/Program.h>
#include "StorageBenchControlMsgEx.h"
bool StorageBenchControlMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "StorageBenchControlMsg incoming";
StorageBenchResultsMap results;
int cmdErrorCode = STORAGEBENCH_ERROR_NO_ERROR;
App* app = Program::getApp();
StorageBenchOperator* storageBench = app->getStorageBenchOperator();
switch(getAction())
{
case StorageBenchAction_START:
{
cmdErrorCode = storageBench->initAndStartStorageBench(&getTargetIDs(), getBlocksize(),
getSize(), getThreads(), getODirect(), getType() );
} break;
case StorageBenchAction_STOP:
{
cmdErrorCode = storageBench->stopBenchmark();
} break;
case StorageBenchAction_STATUS:
{
storageBench->getStatusWithResults(&getTargetIDs(), &results);
cmdErrorCode = STORAGEBENCH_ERROR_NO_ERROR;
} break;
case StorageBenchAction_CLEANUP:
{
cmdErrorCode = storageBench->cleanup(&getTargetIDs());
} break;
default:
{
LogContext(logContext).logErr("unknown action!");
} break;
}
int errorCode;
// check if the last command from the fhgfs_cmd was successful,
// if not send the error code of the command to the fhgfs_cmd
// if it was successful, send the error code of the last run or acutely run of the benchmark
if (cmdErrorCode != STORAGEBENCH_ERROR_NO_ERROR)
{
errorCode = cmdErrorCode;
}
else
{
errorCode = storageBench->getLastRunErrorCode();
}
ctx.sendResponse(
StorageBenchControlMsgResp(storageBench->getStatus(), getAction(),
storageBench->getType(), errorCode, results) );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/nodes/StorageBenchControlMsg.h>
#include <common/Common.h>
class StorageBenchControlMsgEx: public StorageBenchControlMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,14 @@
#include "RefreshStoragePoolsMsgEx.h"
#include <program/Program.h>
bool RefreshStoragePoolsMsgEx::processIncoming(ResponseContext& ctx)
{
Program::getApp()->getInternodeSyncer()->setForceStoragePoolsUpdate();
// can only come as an AcknowledgableMsg from mgmtd
acknowledge(ctx);
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/storagepools/RefreshStoragePoolsMsg.h>
class RefreshStoragePoolsMsgEx : public RefreshStoragePoolsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,94 @@
#include <program/Program.h>
#include <common/net/message/session/FSyncLocalFileRespMsg.h>
#include <common/storage/StorageErrors.h>
#include <net/msghelpers/MsgHelperIO.h>
#include "FSyncLocalFileMsgEx.h"
bool FSyncLocalFileMsgEx::processIncoming(ResponseContext& ctx)
{
ctx.sendResponse(FSyncLocalFileRespMsg(fsync()));
return true;
}
FhgfsOpsErr FSyncLocalFileMsgEx::fsync()
{
const char* logContext = "FSyncLocalFileMsg incoming";
FhgfsOpsErr clientRes = FhgfsOpsErr_SUCCESS;
bool isMirrorSession = isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_BUDDYMIRROR);
// do session check only when it is not a mirror session
bool useSessionCheck = isMirrorSession ? false :
isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_SESSION_CHECK);
App* app = Program::getApp();
SessionStore* sessions = app->getSessions();
auto session = sessions->referenceOrAddSession(getSessionID());
SessionLocalFileStore* sessionLocalFiles = session->getLocalFiles();
// select the right targetID
uint16_t targetID = getTargetID();
if(isMirrorSession)
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
// note: only log message here, error handling will happen below through invalid targetFD
if(unlikely(!targetID) )
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
}
auto sessionLocalFile =
sessionLocalFiles->referenceSession(getFileHandleID(), targetID, isMirrorSession);
if(sessionLocalFile)
{ // sessionLocalFile exists => check if open and perform fsync
if (!isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_NO_SYNC) )
{
auto& fd = sessionLocalFile->getFD();
if (fd.valid())
{ // file open => sync
int fsyncRes = MsgHelperIO::fsync(*fd);
if(fsyncRes)
{
LogContext log(logContext);
log.log(Log_WARNING, std::string("fsync of chunk file failed. ") +
std::string("SessionID: ") + getSessionID().str() +
std::string(". SysErr: ") + System::getErrString() );
clientRes = FhgfsOpsErr_INTERNAL;
}
}
}
if(useSessionCheck && sessionLocalFile->isServerCrashed() )
{ // server crashed during the write, maybe lost some data send error to client
LogContext log(logContext);
log.log(Log_SPAM, "Potential cache loss for open file handle. (Server crash detected.) "
"The session is marked as dirty.");
clientRes = FhgfsOpsErr_STORAGE_SRV_CRASHED;
}
}
else
if (useSessionCheck)
{ // the server crashed during a write or before the close was successful
LogContext log(logContext);
log.log(Log_WARNING, "Potential cache loss for open file handle. (Server crash detected.) "
"No session for file available. "
"FileHandleID: " + std::string(getFileHandleID()) );
clientRes = FhgfsOpsErr_STORAGE_SRV_CRASHED;
}
return clientRes;
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <common/net/message/session/FSyncLocalFileMsg.h>
class FSyncLocalFileMsgEx : public FSyncLocalFileMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
FhgfsOpsErr fsync();
};

View File

@@ -0,0 +1,252 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/session/opening/CloseChunkFileRespMsg.h>
#include <common/toolkit/SessionTk.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "CloseChunkFileMsgEx.h"
#include <boost/lexical_cast.hpp>
bool CloseChunkFileMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
FhgfsOpsErr closeMsgRes;
DynamicAttribs dynAttribs;
std::tie(closeMsgRes, dynAttribs) = close(ctx);
// if closeMsgRes == FhgfsOpsErr_COMMUNICATION, a GenericResponseMsg has been sent already
if (closeMsgRes != FhgfsOpsErr_COMMUNICATION)
ctx.sendResponse(
CloseChunkFileRespMsg(closeMsgRes, dynAttribs.filesize, dynAttribs.allocedBlocks,
dynAttribs.modificationTimeSecs, dynAttribs.lastAccessTimeSecs,
dynAttribs.storageVersion) );
// update op counters
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_CLOSELOCAL,
getMsgHeaderUserID() );
return true;
}
std::pair<FhgfsOpsErr, CloseChunkFileMsgEx::DynamicAttribs> CloseChunkFileMsgEx::close(
ResponseContext& ctx)
{
const char* logContext = "CloseChunkFileMsg incoming";
App* app = Program::getApp();
Config* config = app->getConfig();
SessionStore* sessions = app->getSessions();
uint16_t targetID;
FhgfsOpsErr closeMsgRes = FhgfsOpsErr_SUCCESS; // the result that will be sent to requestor
DynamicAttribs dynAttribs = {0, 0, 0, 0, 0};
std::string fileHandleID(getFileHandleID() );
bool isMirrorSession = isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR);
SessionLocalFileStore* sessionLocalFiles;
// select the right targetID
targetID = getTargetID();
if(isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
if(unlikely(!targetID) )
{ // unknown target
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
return {FhgfsOpsErr_UNKNOWNTARGET, {}};
}
}
// forward to secondary (if appropriate)
closeMsgRes = forwardToSecondary(ctx);
if (unlikely(closeMsgRes != FhgfsOpsErr_SUCCESS))
return {closeMsgRes, dynAttribs};
auto session = sessions->referenceOrAddSession(getSessionID());
sessionLocalFiles = session->getLocalFiles();
auto fsState = sessionLocalFiles->removeSession(fileHandleID, targetID, isMirrorSession);
// get current dynamic file attribs
if (fsState)
{ // file no longer in use => refresh filesize and close file fd
auto& fd = fsState->getFD();
/* get dynamic attribs, here before closing the file.
* Note: Depending on the underlying file system the returned st_blocks might be too large
* (pre-allocated blocks, which are only released on close() ). Advantage here is
* that we already have the file descriptor. */
if( (config->getTuneEarlyStat() ) &&
(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS) ) )
getDynamicAttribsByFD(*fd, fileHandleID, targetID, dynAttribs);
// close fd
if (!fsState->close())
closeMsgRes = FhgfsOpsErr_INTERNAL;
// only get the attributes here, in order to make xfs to release pre-allocated blocks
if( (!config->getTuneEarlyStat() ) &&
(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS) ) )
getDynamicAttribsByPath(fileHandleID, targetID, dynAttribs);
}
else
if(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS) )
{ // file still in use by other threads => get dynamic attribs by path
bool getRes = getDynamicAttribsByPath(fileHandleID, targetID, dynAttribs);
if (getRes)
{
// LogContext(logContext).log(Log_DEBUG, "Chunk file virtually closed. "
// "HandleID: " + fileHandleID);
}
}
// note: "file not exists" is not an error. we just have nothing to do in that case.
return {closeMsgRes, dynAttribs};
}
/**
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
*
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
* case *outChunkLocked==false is guaranteed).
* @throw SocketException if sending of GenericResponseMsg fails.
*/
FhgfsOpsErr CloseChunkFileMsgEx::forwardToSecondary(ResponseContext& ctx)
{
const char* logContext = "CloseChunkFileMsg incoming (forward to secondary)";
App* app = Program::getApp();
if(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR) ||
isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
return FhgfsOpsErr_SUCCESS; // nothing to do
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
addMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND);
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_CloseChunkFileResp);
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
// remove the flag that we just added for secondary
unsetMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND);
if(unlikely(
(commRes == FhgfsOpsErr_COMMUNICATION) &&
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
{
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );;
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
}
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_DEBUG, "Forwarding failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
"error: " + boost::lexical_cast<std::string>(commRes));
std::string genericRespStr = "Communication with secondary failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(genericRespStr)));
return FhgfsOpsErr_COMMUNICATION;
}
CloseChunkFileRespMsg* respMsg = (CloseChunkFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr secondaryRes = respMsg->getResult();
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
boost::lexical_cast<std::string>(secondaryRes) + "; "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
return secondaryRes;
}
return FhgfsOpsErr_SUCCESS;
}
bool CloseChunkFileMsgEx::getDynamicAttribsByFD(const int fd, std::string fileHandleID,
uint16_t targetID, DynamicAttribs& outDynAttribs)
{
SyncedStoragePaths* syncedPaths = Program::getApp()->getSyncedStoragePaths();
std::string fileID(SessionTk::fileIDFromHandleID(fileHandleID) );
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // LOCK
// note: this is locked because we need to get the filesize together with the storageVersion
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(fd, &outDynAttribs.filesize,
&outDynAttribs.allocedBlocks, &outDynAttribs.modificationTimeSecs,
&outDynAttribs.lastAccessTimeSecs);
if(getDynAttribsRes)
outDynAttribs.storageVersion = storageVersion;
syncedPaths->unlockPath(fileID, targetID); // UNLOCK
return getDynAttribsRes;
}
bool CloseChunkFileMsgEx::getDynamicAttribsByPath(std::string fileHandleID, uint16_t targetID,
DynamicAttribs& outDynAttribs)
{
const char* logContext = "CloseChunkFileMsg (attribs by path)";
App* app = Program::getApp();
SyncedStoragePaths* syncedPaths = app->getSyncedStoragePaths();
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{ // unknown targetID
LogContext(logContext).logErr("Unknown targetID: " + StringTk::uintToStr(targetID) );
return false;
}
const int targetFD = isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR)
? *target->getMirrorFD()
: *target->getChunkFD();
std::string fileID = SessionTk::fileIDFromHandleID(fileHandleID);
std::string pathStr = StorageTk::getFileChunkPath(getPathInfo(), fileID);
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // L O C K path
// note: this is locked because we need to get the filesize together with the storageVersion
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(targetFD, pathStr.c_str(),
&outDynAttribs.filesize, &outDynAttribs.allocedBlocks, &outDynAttribs.modificationTimeSecs,
&outDynAttribs.lastAccessTimeSecs);
if(getDynAttribsRes)
outDynAttribs.storageVersion = storageVersion;
syncedPaths->unlockPath(fileID, targetID); // U N L O C K path
return getDynAttribsRes;
}

View File

@@ -0,0 +1,29 @@
#pragma once
#include <common/net/message/session/opening/CloseChunkFileMsg.h>
class CloseChunkFileMsgEx : public CloseChunkFileMsg
{
private:
struct DynamicAttribs
{
int64_t filesize;
int64_t allocedBlocks; // allocated 512byte blocks (relevant for sparse files)
int64_t modificationTimeSecs;
int64_t lastAccessTimeSecs;
uint64_t storageVersion;
};
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
FhgfsOpsErr forwardToSecondary(ResponseContext& ctx);
bool getDynamicAttribsByFD(int fd, std::string fileHandleID, uint16_t targetID,
DynamicAttribs& outDynAttribs);
bool getDynamicAttribsByPath(std::string fileHandleID, uint16_t targetID,
DynamicAttribs& outDynAttribs);
std::pair<FhgfsOpsErr, DynamicAttribs> close(ResponseContext& ctx);
};

View File

@@ -0,0 +1,114 @@
#pragma once
#ifdef BEEGFS_NVFS
#include <string>
#include <typeinfo>
#include <common/net/message/session/rw/ReadLocalFileRDMAMsg.h>
#include <common/storage/StorageErrors.h>
#include <common/components/worker/Worker.h>
#include <session/SessionLocalFileStore.h>
#include "ReadLocalFileV2MsgEx.h"
/**
* Implements RDMA write protocol.
*/
class ReadLocalFileRDMAMsgSender : public ReadLocalFileRDMAMsg
{
public:
struct ReadState : public ReadStateBase
{
RdmaInfo* rdma;
uint64_t rBuf;
size_t rLen;
uint64_t rOff;
ReadState(const char* logContext, uint64_t toBeRead,
SessionLocalFile* sessionLocalFile) :
ReadStateBase(logContext, toBeRead, sessionLocalFile) {}
};
private:
friend class ReadLocalFileMsgExBase<ReadLocalFileRDMAMsgSender, ReadState>;
static std::string logContextPref;
inline void sendLengthInfo(Socket* sock, int64_t lengthInfo)
{
lengthInfo = HOST_TO_LE_64(lengthInfo);
sock->send(&lengthInfo, sizeof(int64_t), 0);
}
/**
* RDMA write data to the remote buffer.
*/
inline ssize_t readStateSendData(Socket* sock, ReadState& rs, char* buf, bool isFinal)
{
ssize_t writeRes = sock->write(buf, rs.readRes, 0, rs.rBuf + rs.rOff, rs.rdma->key);
LOG_DEBUG(rs.logContext, Log_DEBUG,
"buf: " + StringTk::uint64ToHexStr((uint64_t)buf) + "; "
"bufLen: " + StringTk::int64ToStr(rs.readRes) + "; "
"rbuf: " + StringTk::uint64ToHexStr(rs.rBuf) + "; "
"rkey: " + StringTk::uintToHexStr(rs.rdma->key) + "; "
"writeRes: " + StringTk::int64ToStr(writeRes));
if (unlikely(writeRes != rs.readRes))
{
LogContext(rs.logContext).logErr("Unable to write file data to client. "
"FileID: " + rs.sessionLocalFile->getFileID() + "; "
"SysErr: " + System::getErrString());
writeRes = -1;
}
if (isFinal && likely(writeRes >= 0))
sendLengthInfo(sock, getCount() - rs.toBeRead);
return writeRes;
}
inline ssize_t getReadLength(ReadState& rs, ssize_t len)
{
// Cannot RDMA anything larger than WORKER_BUFOUT_SIZE in a single operation
// because that is the size of the buffer passed in by the Worker.
// TODO: pass around a Buffer with a length instead of unqualified char*.
return BEEGFS_MIN(BEEGFS_MIN(len, ssize_t(rs.rLen - rs.rOff)), WORKER_BUFOUT_SIZE);
}
inline bool readStateInit(ReadState& rs)
{
rs.rdma = getRdmaInfo();
if (unlikely(!rs.rdma->next(rs.rBuf, rs.rLen, rs.rOff)))
{
LogContext(rs.logContext).logErr("No entities in RDMA buffers.");
return false;
}
return true;
}
inline bool readStateNext(ReadState& rs)
{
rs.rOff += rs.readRes;
if (rs.rOff == rs.rLen)
{
if (unlikely(!rs.rdma->next(rs.rBuf, rs.rLen, rs.rOff)))
{
LogContext(rs.logContext).logErr("RDMA buffers exhausted");
return false;
}
}
return true;
}
inline size_t getBuffers(ResponseContext& ctx, char** dataBuf, char** sendBuf)
{
*dataBuf = ctx.getBuffer();
*sendBuf = *dataBuf;
return ctx.getBufferLength();
}
};
typedef ReadLocalFileMsgExBase<ReadLocalFileRDMAMsgSender,
ReadLocalFileRDMAMsgSender::ReadState> ReadLocalFileRDMAMsgEx;
#endif /* BEEGFS_NVFS */

View File

@@ -0,0 +1,466 @@
#include <program/Program.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/SessionTk.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <toolkit/StorageTkEx.h>
#include "ReadLocalFileV2MsgEx.h"
#ifdef BEEGFS_NVFS
#include "ReadLocalFileRDMAMsgEx.h"
#endif
#include <sys/sendfile.h>
#include <sys/mman.h>
#define READ_USE_TUNEFILEREAD_TRIGGER (4*1024*1024) /* seq IO trigger for tuneFileReadSize */
#define READ_BUF_OFFSET_PROTO_MIN (sizeof(int64_t) ) /* for prepended length info */
#define READ_BUF_END_PROTO_MIN (sizeof(int64_t) ) /* for appended length info */
/* reserve more than necessary at buf start to achieve page cache alignment */
const size_t READ_BUF_OFFSET =
BEEGFS_MAX( (long)READ_BUF_OFFSET_PROTO_MIN, sysconf(_SC_PAGESIZE) );
/* reserve more than necessary at buf end to achieve page cache alignment */
const size_t READ_BUF_END_RESERVE =
BEEGFS_MAX( (long)READ_BUF_END_PROTO_MIN, sysconf(_SC_PAGESIZE) );
/* read buffer size cutoff for protocol data */
const size_t READ_BUF_LEN_PROTOCOL_CUTOFF =
READ_BUF_OFFSET + READ_BUF_END_RESERVE;
// A linker error occurs for processIncoming without having this forced linkage.
static ReadLocalFileV2MsgEx forcedLinkageV2;
#ifdef BEEGFS_NVFS
static ReadLocalFileRDMAMsgEx forcedLinkageRDMA;
#endif
std::string ReadLocalFileV2MsgSender::logContextPref = "ReadChunkFileV2Msg";
#ifdef BEEGFS_NVFS
std::string ReadLocalFileRDMAMsgSender::logContextPref = "ReadChunkFileRDMAMsg";
#endif
template <class Msg, typename ReadState>
bool ReadLocalFileMsgExBase<Msg, ReadState>::processIncoming(NetMessage::ResponseContext& ctx)
{
std::string logContext = Msg::logContextPref + " incoming";
bool retVal = true; // return value
int64_t readRes = 0;
std::string fileHandleID(getFileHandleID() );
bool isMirrorSession = isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_BUDDYMIRROR);
// do session check only when it is not a mirror session
bool useSessionCheck = isMirrorSession ? false :
isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_SESSION_CHECK);
App* app = Program::getApp();
SessionStore* sessions = app->getSessions();
auto session = sessions->referenceOrAddSession(getClientNumID());
this->sessionLocalFiles = session->getLocalFiles();
// select the right targetID
uint16_t targetID = getTargetID();
if(isMirrorSession )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
// note: only log message here, error handling will happen below through invalid targetFD
if(unlikely(!targetID) )
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
}
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
if (isMirrorSession)
{ /* buddy mirrored file => fail with Err_COMMUNICATION to make the requestor retry.
mgmt will mark this target as (p)offline in a few moments. */
LOG(GENERAL, NOTICE, "Unknown target ID, refusing request.", targetID);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
return true;
}
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_UNKNOWNTARGET);
return true;
}
// check if we already have a session for this file...
auto sessionLocalFile = sessionLocalFiles->referenceSession(
fileHandleID, targetID, isMirrorSession);
if(!sessionLocalFile)
{ // sessionLocalFile not exists yet => create, insert, re-get it
if(useSessionCheck)
{ // server crashed during the write, maybe lost some data send error to client
LogContext log(logContext);
log.log(Log_WARNING, "Potential cache loss for open file handle. (Server crash detected.) "
"No session for file available. "
"FileHandleID: " + fileHandleID);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_STORAGE_SRV_CRASHED);
goto release_session;
}
std::string fileID = SessionTk::fileIDFromHandleID(fileHandleID);
int openFlags = SessionTk::sysOpenFlagsFromFhgfsAccessFlags(getAccessFlags() );
auto newFile = boost::make_unique<SessionLocalFile>(fileHandleID, targetID, fileID, openFlags,
false);
if(isMirrorSession)
newFile->setIsMirrorSession(true);
sessionLocalFile = sessionLocalFiles->addAndReferenceSession(std::move(newFile));
}
else
{ // session file exists
if(useSessionCheck && sessionLocalFile->isServerCrashed() )
{ // server crashed during the write, maybe lost some data send error to client
LogContext log(logContext);
log.log(Log_SPAM, "Potential cache loss for open file handle. (Server crash detected.) "
"The session is marked as dirty. "
"FileHandleID: " + fileHandleID);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_STORAGE_SRV_CRASHED);
goto release_session;
}
}
/* Note: the session file must be unlocked/released before we send the finalizing info,
because otherwise we have a race when the client assumes the read is complete and tries
to close the file (while the handle is actually still referenced on the server). */
/* Note: we also must be careful to update the current offset before sending the final length
info because otherwise the session file might have been released already and we have no
longer access to the offset. */
readRes = -1;
try
{
// prepare file descriptor (if file not open yet then open it if it exists already)
FhgfsOpsErr openRes = openFile(*target, sessionLocalFile.get());
if(openRes != FhgfsOpsErr_SUCCESS)
{
sendLengthInfo(ctx.getSocket(), -openRes);
goto release_session;
}
// check if file exists
if(!sessionLocalFile->getFD().valid())
{ // file didn't exist (not an error) => send EOF
sendLengthInfo(ctx.getSocket(), 0);
goto release_session;
}
// the actual read workhorse...
readRes = incrementalReadStatefulAndSendV2(ctx, sessionLocalFile.get());
LOG_DEBUG(logContext, Log_SPAM, "sending completed. "
"readRes: " + StringTk::int64ToStr(readRes) );
IGNORE_UNUSED_VARIABLE(readRes);
}
catch(SocketException& e)
{
LogContext(logContext).logErr(std::string("SocketException occurred: ") + e.what() );
LogContext(logContext).log(Log_WARNING, "Details: "
"sessionID: " + getClientNumID().str() + "; "
"fileHandle: " + fileHandleID + "; "
"offset: " + StringTk::int64ToStr(getOffset() ) + "; "
"count: " + StringTk::int64ToStr(getCount() ) );
sessionLocalFile->setOffset(-1); /* invalidate offset (we can only do this if still locked,
but that's not a prob if we update offset correctly before send - see notes above) */
retVal = false;
goto release_session;
}
release_session:
// update operation counters
if(likely(readRes > 0) )
app->getNodeOpStats()->updateNodeOp(
ctx.getSocket()->getPeerIP(), StorageOpCounter_READOPS, readRes, getMsgHeaderUserID() );
return retVal;
}
inline size_t ReadLocalFileV2MsgSender::getBuffers(ResponseContext& ctx, char** dataBuf, char** sendBuf)
{
*dataBuf = ctx.getBuffer() + READ_BUF_OFFSET; // offset for prepended data length info
*sendBuf = *dataBuf - READ_BUF_OFFSET_PROTO_MIN;
return ctx.getBufferLength() - READ_BUF_LEN_PROTOCOL_CUTOFF; /* cutoff for
prepended and finalizing length info */
}
/**
* Note: This is similar to incrementalReadAndSend, but uses the offset from sessionLocalFile
* to avoid calling seek every time.
*
* Warning: Do not use the returned value to set the new offset, as there might be other threads
* that also did something with the file (i.e. the io-lock is released somewhere within this
* method).
*
* @return number of bytes read or some arbitrary negative value otherwise
*/
template <class Msg, typename ReadState>
int64_t ReadLocalFileMsgExBase<Msg, ReadState>::incrementalReadStatefulAndSendV2(NetMessage::ResponseContext& ctx,
SessionLocalFile* sessionLocalFile)
{
/* note on session offset: the session offset must always be set before sending the data to the
client (otherwise the client could send the next request before we updated the offset, which
would lead to a race condition) */
std::string logContext = Msg::logContextPref + " (read incremental)";
Config* cfg = Program::getApp()->getConfig();
char* dataBuf;
char* sendBuf;
if (READ_BUF_LEN_PROTOCOL_CUTOFF >= ctx.getBufferLength())
{ // buffer too small. That shouldn't happen and is an error
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_INTERNAL);
return -1;
}
const ssize_t dataBufLen = getBuffers(ctx, &dataBuf, &sendBuf);
auto& fd = sessionLocalFile->getFD();
int64_t oldOffset = sessionLocalFile->getOffset();
int64_t newOffset = getOffset();
bool skipReadAhead =
unlikely(isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_DISABLE_IO) ||
sessionLocalFile->getIsDirectIO());
ssize_t readAheadSize = skipReadAhead ? 0 : cfg->getTuneFileReadAheadSize();
ssize_t readAheadTriggerSize = cfg->getTuneFileReadAheadTriggerSize();
if( (oldOffset < 0) || (oldOffset != newOffset) )
{
sessionLocalFile->resetReadCounter(); // reset sequential read counter
sessionLocalFile->resetLastReadAheadTrigger();
}
else
{ // read continues at previous offset
LOG_DEBUG(logContext, Log_SPAM,
"fileID: " + sessionLocalFile->getFileID() + "; "
"offset: " + StringTk::int64ToStr(getOffset() ) );
}
size_t maxReadAtOnceLen = dataBufLen;
// reduce maxReadAtOnceLen to achieve better read/send aync overlap
/* (note: reducing makes only sense if we can rely on the kernel to do some read-ahead, so don't
reduce for direct IO and for random IO) */
if( (sessionLocalFile->getReadCounter() >= READ_USE_TUNEFILEREAD_TRIGGER) &&
!sessionLocalFile->getIsDirectIO() )
maxReadAtOnceLen = BEEGFS_MIN(dataBufLen, cfg->getTuneFileReadSize() );
off_t readOffset = getOffset();
ReadState readState(logContext.c_str(), getCount(), sessionLocalFile);
if (!isMsgValid() || !readStateInit(readState))
{
LogContext(logContext).logErr("Invalid read message.");
sessionLocalFile->setOffset(-1);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_INVAL);
return -1;
}
for( ; ; )
{
ssize_t readLength = getReadLength(readState, BEEGFS_MIN(maxReadAtOnceLen, readState.toBeRead));
readState.readRes = unlikely(isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_DISABLE_IO) ) ?
readLength : MsgHelperIO::pread(*fd, dataBuf, readLength, readOffset);
LOG_DEBUG(logContext, Log_SPAM,
"toBeRead: " + StringTk::int64ToStr(readState.toBeRead) + "; "
"readLength: " + StringTk::int64ToStr(readLength) + "; "
"readRes: " + StringTk::int64ToStr(readState.readRes) );
if(readState.readRes == readLength)
{ // simple success case
readState.toBeRead -= readState.readRes;
readOffset += readState.readRes;
int64_t newOffset = getOffset() + getCount() - readState.toBeRead;
sessionLocalFile->setOffset(newOffset); // update offset
sessionLocalFile->incReadCounter(readState.readRes); // update sequential read length
ctx.getStats()->incVals.diskReadBytes += readState.readRes; // update stats
bool isFinal = !readState.toBeRead;
if (readStateSendData(ctx.getSocket(), readState, sendBuf, isFinal) < 0)
{
LogContext(logContext).logErr("readStateSendData failed.");
sessionLocalFile->setOffset(-1);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
return -1;
}
checkAndStartReadAhead(sessionLocalFile, readAheadTriggerSize, newOffset, readAheadSize);
if(isFinal)
{ // we reached the end of the requested data
return getCount();
}
if (!readStateNext(readState))
{
LogContext(logContext).logErr("readStateNext failed.");
sessionLocalFile->setOffset(-1);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
return -1;
}
}
else
{ // readRes not as it should be => might be an error or just an end-of-file
if(readState.readRes == -1)
{ // read error occurred
LogContext(logContext).log(Log_WARNING, "Unable to read file data. "
"FileID: " + sessionLocalFile->getFileID() + "; "
"SysErr: " + System::getErrString() );
sessionLocalFile->setOffset(-1);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_INTERNAL);
return -1;
}
else
{ // just an end of file
LOG_DEBUG(logContext, Log_DEBUG,
"Unable to read all of the requested data (=> end of file)");
LOG_DEBUG(logContext, Log_DEBUG,
"offset: " + StringTk::int64ToStr(getOffset() ) + "; "
"count: " + StringTk::int64ToStr(getCount() ) + "; "
"readLength: " + StringTk::int64ToStr(readLength) + "; " +
"readRes: " + StringTk::int64ToStr(readState.readRes) + "; " +
"toBeRead: " + StringTk::int64ToStr(readState.toBeRead) );
readOffset += readState.readRes;
readState.toBeRead -= readState.readRes;
sessionLocalFile->setOffset(getOffset() + getCount() - readState.toBeRead); // update offset
sessionLocalFile->incReadCounter(readState.readRes); // update sequential read length
ctx.getStats()->incVals.diskReadBytes += readState.readRes; // update stats
if(readState.readRes > 0)
{
if (readStateSendData(ctx.getSocket(), readState, sendBuf, true) < 0)
{
LogContext(logContext).logErr("readStateSendData failed.");
sessionLocalFile->setOffset(-1);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
return -1;
}
}
else
sendLengthInfo(ctx.getSocket(), 0);
return(getCount() - readState.toBeRead);
}
}
} // end of for-loop
}
/**
* Starts read-ahead if enough sequential data has been read.
*
* Note: if getDisableIO() is true, we assume the caller sets readAheadSize==0, so getDisableIO()
* is not checked explicitly within this function.
*
* @sessionLocalFile lastReadAheadOffset will be updated if read-head was triggered
* @param readAheadTriggerSize the length of sequential IO that triggers read-ahead
* @param currentOffset current file offset (where read-ahead would start)
*/
template <class Msg, typename ReadState>
void ReadLocalFileMsgExBase<Msg, ReadState>::checkAndStartReadAhead(SessionLocalFile* sessionLocalFile,
ssize_t readAheadTriggerSize, off_t currentOffset, off_t readAheadSize)
{
std::string logContext = Msg::logContextPref + " (read-ahead)";
if(!readAheadSize)
return;
int64_t readCounter = sessionLocalFile->getReadCounter();
int64_t nextReadAheadTrigger = sessionLocalFile->getLastReadAheadTrigger() ?
sessionLocalFile->getLastReadAheadTrigger() + readAheadSize : readAheadTriggerSize;
if(readCounter < nextReadAheadTrigger)
return; // we're not at the trigger point yet
/* start read-head...
(read-ahead is supposed to be non-blocking if there are free slots in the device IO queue) */
LOG_DEBUG(logContext, Log_SPAM,
std::string("Starting read-ahead... ") +
"offset: " + StringTk::int64ToStr(currentOffset) + "; "
"size: " + StringTk::int64ToStr(readAheadSize) );
MsgHelperIO::readAhead(*sessionLocalFile->getFD(), currentOffset, readAheadSize);
// update trigger
sessionLocalFile->setLastReadAheadTrigger(readCounter);
}
/**
* Open the file if a filedescriptor is not already set in sessionLocalFile.
* If the file needs to be opened, this method will check the target consistency state before
* opening.
*
* @return we return the special value FhgfsOpsErr_COMMUNICATION here in some cases to indirectly
* ask the client for a retry (e.g. if target consistency is not good for buddymirrored chunks).
*/
template <class Msg, typename ReadState>
FhgfsOpsErr ReadLocalFileMsgExBase<Msg, ReadState>::openFile(const StorageTarget& target,
SessionLocalFile* sessionLocalFile)
{
std::string logContext = Msg::logContextPref + " (open)";
bool isBuddyMirrorChunk = sessionLocalFile->getIsMirrorSession();
if (sessionLocalFile->getFD().valid())
return FhgfsOpsErr_SUCCESS; // file already open => nothing to be done here
// file not open yet => get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) && isBuddyMirrorChunk)
{ // this is a request for a buddymirrored chunk on a non-good target
LogContext(logContext).log(Log_NOTICE, "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID()));
return FhgfsOpsErr_COMMUNICATION;
}
FhgfsOpsErr openChunkRes = sessionLocalFile->openFile(targetFD, getPathInfo(), false, NULL);
return openChunkRes;
}

View File

@@ -0,0 +1,216 @@
#pragma once
#include <common/net/message/session/rw/ReadLocalFileV2Msg.h>
#include <common/storage/StorageErrors.h>
#include <session/SessionLocalFileStore.h>
class StorageTarget;
/**
* Contains common data needed by implementations of the network protocol
* that send data to the client.
*/
struct ReadStateBase
{
const char* logContext;
uint64_t toBeRead;
SessionLocalFile* sessionLocalFile;
ssize_t readRes;
ReadStateBase(const char* logContext, uint64_t toBeRead,
SessionLocalFile* sessionLocalFile)
{
this->logContext = logContext;
this->toBeRead = toBeRead;
this->sessionLocalFile = sessionLocalFile;
}
};
template <class Msg, typename ReadState>
class ReadLocalFileMsgExBase : public Msg
{
public:
bool processIncoming(NetMessage::ResponseContext& ctx);
private:
SessionLocalFileStore* sessionLocalFiles;
FhgfsOpsErr openFile(const StorageTarget& target, SessionLocalFile* sessionLocalFile);
void checkAndStartReadAhead(SessionLocalFile* sessionLocalFile, ssize_t readAheadTriggerSize,
off_t currentOffset, off_t readAheadSize);
int64_t incrementalReadStatefulAndSendV2(NetMessage::ResponseContext& ctx,
SessionLocalFile* sessionLocalFile);
inline void sendLengthInfo(Socket* sock, int64_t lengthInfo)
{
static_cast<Msg&>(*this).sendLengthInfo(sock, lengthInfo);
}
inline bool readStateInit(ReadState& rs)
{
return static_cast<Msg&>(*this).readStateInit(rs);
}
inline ssize_t readStateSendData(Socket* sock, ReadState& rs, char* buf, bool isFinal)
{
return static_cast<Msg&>(*this).readStateSendData(sock, rs, buf, isFinal);
}
inline bool readStateNext(ReadState& rs)
{
return static_cast<Msg&>(*this).readStateNext(rs);
}
inline ssize_t getReadLength(ReadState& rs, ssize_t len)
{
return static_cast<Msg&>(*this).getReadLength(rs, len);
}
inline size_t getBuffers(NetMessage::ResponseContext& ctx, char** dataBuf, char** sendBuf)
{
return static_cast<Msg&>(*this).getBuffers(ctx, dataBuf, sendBuf);
}
public:
inline unsigned getMsgHeaderUserID() const
{
return static_cast<const Msg&>(*this).getMsgHeaderUserID();
}
inline bool isMsgHeaderFeatureFlagSet(unsigned flag) const
{
return static_cast<const Msg&>(*this).isMsgHeaderFeatureFlagSet(flag);
}
inline uint16_t getTargetID() const
{
return static_cast<const Msg&>(*this).getTargetID();
}
inline int64_t getOffset() const
{
return static_cast<const Msg&>(*this).getOffset();
}
inline int64_t getCount() const
{
return static_cast<const Msg&>(*this).getCount();
}
inline const char* getFileHandleID()
{
return static_cast<Msg&>(*this).getFileHandleID();
}
inline NumNodeID getClientNumID() const
{
return static_cast<const Msg&>(*this).getClientNumID();
}
inline unsigned getAccessFlags() const
{
return static_cast<const Msg&>(*this).getAccessFlags();
}
inline PathInfo* getPathInfo ()
{
return static_cast<Msg&>(*this).getPathInfo();
}
inline bool isMsgValid() const
{
return static_cast<const Msg&>(*this).isMsgValid();
}
};
/**
* Implements the Version 2 send protocol. It uses a preceding length info for each chunk.
*/
class ReadLocalFileV2MsgSender : public ReadLocalFileV2Msg
{
/* note on protocol: this works by sending an int64 before each data chunk, which contains the
length of the next data chunk; or a zero if no more data can be read; or a negative fhgfs
error code in case of an error */
public:
struct ReadState : public ReadStateBase
{
ReadState(const char* logContext, uint64_t toBeRead,
SessionLocalFile* sessionLocalFile) :
ReadStateBase(logContext, toBeRead, sessionLocalFile) {}
};
private:
friend class ReadLocalFileMsgExBase<ReadLocalFileV2MsgSender, ReadState>;
static std::string logContextPref;
/**
* Send only length information without a data packet. Typically used for the final length
* info at the end of the requested data.
*/
inline void sendLengthInfo(Socket* sock, int64_t lengthInfo)
{
lengthInfo = HOST_TO_LE_64(lengthInfo);
sock->send(&lengthInfo, sizeof(int64_t), 0);
}
/**
* No-op for this implementation.
*/
inline bool readStateInit(ReadState& rs)
{
return true;
}
/**
* Send length information and the corresponding data packet buffer.
*
* Note: rs.readRes is used to compute buf length for send()
*
* @param rs.readRes must not be negative
* @param buf the buffer with a preceding gap for the length info
* @param isFinal true if this is the last send, i.e. we have read all data
*/
inline ssize_t readStateSendData(Socket* sock, ReadState& rs, char* buf, bool isFinal)
{
ssize_t sendRes;
{
Serializer ser(buf, sizeof(int64_t));
ser % rs.readRes;
}
if (isFinal)
{
Serializer ser(buf + sizeof(int64_t) + rs.readRes, sizeof(int64_t));
ser % int64_t(0);
sendRes = sock->send(buf, (2*sizeof(int64_t) ) + rs.readRes, 0);
}
else
{
sendRes = sock->send(buf, sizeof(int64_t) + rs.readRes, 0);
}
return sendRes;
}
/**
* No-op for this implementation.
*/
inline bool readStateNext(ReadState& rs)
{
return true;
}
inline ssize_t getReadLength(ReadState& rs, ssize_t len)
{
return len;
}
size_t getBuffers(ResponseContext& ctx, char** dataBuf, char** sendBuf);
};
typedef ReadLocalFileMsgExBase<ReadLocalFileV2MsgSender,
ReadLocalFileV2MsgSender::ReadState> ReadLocalFileV2MsgEx;

View File

@@ -0,0 +1,926 @@
#include <program/Program.h>
#include <common/toolkit/MessagingTk.h>
#include <common/toolkit/SessionTk.h>
#include <common/toolkit/StorageTk.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <storage/StorageTargets.h>
#include <toolkit/StorageTkEx.h>
#include "WriteLocalFileMsgEx.h"
#ifdef BEEGFS_NVFS
#include "WriteLocalFileRDMAMsgEx.h"
#endif
#include <boost/lexical_cast.hpp>
static WriteLocalFileMsgEx forcedLinkage;
#ifdef BEEGFS_NVFS
static WriteLocalFileRDMAMsgEx forcedLinkageRDMA;
#endif
const std::string WriteLocalFileMsgSender::logContextPref = "WriteChunkFileMsg";
#ifdef BEEGFS_NVFS
const std::string WriteLocalFileRDMAMsgSender::logContextPref = "WriteChunkFileRDMAMsg";
#endif
template <class Msg, typename WriteState>
bool WriteLocalFileMsgExBase<Msg, WriteState>::processIncoming(NetMessage::ResponseContext& ctx)
{
App* app = Program::getApp();
bool success;
int64_t writeClientRes;
if (!isMsgValid())
{
sendResponse(ctx, FhgfsOpsErr_INVAL);
return false;
}
std::tie(success, writeClientRes) = write(ctx);
if (success)
{
sendResponse(ctx, writeClientRes);
// update operation counters
if (likely(writeClientRes > 0))
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_WRITEOPS, writeClientRes, getMsgHeaderUserID());
}
return success;
}
template <class Msg, typename WriteState>
std::pair<bool, int64_t> WriteLocalFileMsgExBase<Msg, WriteState>::write(NetMessage::ResponseContext& ctx)
{
std::string logContext = Msg::logContextPref + " incoming";
App* app = Program::getApp();
int64_t writeClientRes = -(int64_t)FhgfsOpsErr_INTERNAL; // bytes written or negative fhgfs err
FhgfsOpsErr finishMirroringRes = FhgfsOpsErr_INTERNAL;
std::string fileHandleID(getFileHandleID() );
bool isMirrorSession = isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR);
bool serverCrashed = false;
QuotaExceededErrorType quotaExceeded = QuotaExceededErrorType_NOT_EXCEEDED;
SessionStore* sessions = Program::getApp()->getSessions();
auto session = sessions->referenceOrAddSession(getClientNumID());
SessionLocalFileStore* sessionLocalFiles = session->getLocalFiles();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
bool chunkLocked = false;
// select the right targetID
uint16_t targetID = getTargetID();
if(isMirrorSession)
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
// note: only log message here, error handling will happen below through invalid targetFD
if(unlikely(!targetID) )
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
}
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
if (isMirrorSession)
{ /* buddy mirrored file => fail with Err_COMMUNICATION to make the requestor retry.
mgmt will mark this target as (p)offline in a few moments. */
LOG(GENERAL, NOTICE, "Unknown target ID, refusing request.", targetID);
return {false, FhgfsOpsErr_COMMUNICATION};
}
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
return {false, FhgfsOpsErr_UNKNOWNTARGET};
}
// check if we already have session for this file...
auto sessionLocalFile = sessionLocalFiles->referenceSession(
fileHandleID, targetID, isMirrorSession);
if(!sessionLocalFile)
{ // sessionLocalFile not exists yet => create, insert, re-get it
if(doSessionCheck() )
{ // server crashed during the write, maybe lost some data send error to client
LogContext log(logContext);
log.log(Log_WARNING, "Potential cache loss for open file handle. (Server crash detected.) "
"No session for file available. "
"FileHandleID: " + fileHandleID);
serverCrashed = true;
}
std::string fileID = SessionTk::fileIDFromHandleID(fileHandleID);
int openFlags = SessionTk::sysOpenFlagsFromFhgfsAccessFlags(getAccessFlags() );
auto newFile = boost::make_unique<SessionLocalFile>(fileHandleID, targetID, fileID, openFlags,
serverCrashed);
if(isMirrorSession)
newFile->setIsMirrorSession(true);
sessionLocalFile = sessionLocalFiles->addAndReferenceSession(std::move(newFile));
}
else
{ // session file exists
if(doSessionCheck() && sessionLocalFile->isServerCrashed() )
{ // server crashed during the write, maybe lost some data send error to client
LogContext log(logContext);
log.log(Log_SPAM, "Potential cache loss for open file handle. (Server crash detected.)"
"The session is marked as dirty. "
"FileHandleID: " + fileHandleID);
serverCrashed = true;
}
}
// check if the size quota is exceeded for the user or group
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA) &&
app->getConfig()->getQuotaEnableEnforcement() )
{
quotaExceeded = app->getExceededQuotaStores()->get(targetID)->isQuotaExceeded(getUserID(),
getGroupID(), QuotaLimitType_SIZE);
if(quotaExceeded != QuotaExceededErrorType_NOT_EXCEEDED)
{
LogContext(logContext).log(Log_NOTICE,
QuotaData::QuotaExceededErrorTypeToString(quotaExceeded) + " "
"UID: " + StringTk::uintToStr(this->getUserID()) + "; "
"GID: " + StringTk::uintToStr(this->getGroupID() ) );
// receive the message content before return with error
incrementalRecvPadding(ctx, getCount(), sessionLocalFile.get());
writeClientRes = -(int64_t) FhgfsOpsErr_DQUOT;
goto cleanup;
}
}
try
{
if(isMirrorSession && target->getBuddyResyncInProgress())
{
// mirrored chunk should be modified, check if resync is in progress and lock chunk
std::string chunkID = sessionLocalFile->getFileID();
chunkLockStore->lockChunk(targetID, chunkID);
chunkLocked = true;
}
// prepare file descriptor (if file not open yet then create/open it)
FhgfsOpsErr openRes = openFile(*target, sessionLocalFile.get());
if(unlikely(openRes != FhgfsOpsErr_SUCCESS) )
{
incrementalRecvPadding(ctx, getCount(), sessionLocalFile.get());
writeClientRes = -(int64_t)openRes;
goto cleanup;
}
// store mirror node reference in session and init mirrorToSock member
FhgfsOpsErr prepMirrorRes = prepareMirroring(ctx.getBuffer(), ctx.getBufferLength(),
sessionLocalFile.get(), *target);
if(unlikely(prepMirrorRes != FhgfsOpsErr_SUCCESS) )
{ // mirroring failed
incrementalRecvPadding(ctx, getCount(), sessionLocalFile.get());
writeClientRes = -(int64_t)prepMirrorRes;
goto cleanup;
}
// the actual write workhorse
int64_t writeLocalRes = incrementalRecvAndWriteStateful(ctx, sessionLocalFile.get());
// update client result, offset etc.
int64_t newOffset;
if(unlikely(writeLocalRes < 0) )
newOffset = -1; // writing failed
else
{ // writing succeeded
newOffset = getOffset() + writeLocalRes;
ctx.getStats()->incVals.diskWriteBytes += writeLocalRes; // update stats
}
sessionLocalFile->setOffset(newOffset);
writeClientRes = writeLocalRes;
}
catch(SocketException& e)
{
LogContext(logContext).logErr(std::string("SocketException occurred: ") + e.what() );
LogContext(logContext).log(Log_WARNING, std::string("Details: ") +
"sessionID: " + getClientNumID().str() + "; "
"fileHandle: " + std::string(sessionLocalFile->getFileHandleID() ) + "; "
"offset: " + StringTk::int64ToStr(getOffset() ) + "; "
"count: " + StringTk::int64ToStr(getCount() ) );
sessionLocalFile->setOffset(-1); // invalidate offset
finishMirroring(sessionLocalFile.get(), *target);
if (chunkLocked)
{
std::string chunkID = sessionLocalFile->getFileID();
chunkLockStore->unlockChunk(targetID, chunkID);
}
return {false, -1};
}
cleanup:
finishMirroringRes = finishMirroring(sessionLocalFile.get(), *target);
// check mirroring result (don't overwrite local error code, if any)
if(likely(writeClientRes > 0) )
{ // no local error => check mirroring result
if(unlikely(finishMirroringRes != FhgfsOpsErr_SUCCESS) )
writeClientRes = -finishMirroringRes; // mirroring failed => use err code as client result
}
if (chunkLocked)
{
std::string chunkID = sessionLocalFile->getFileID();
chunkLockStore->unlockChunk(targetID, chunkID);
}
if (serverCrashed)
writeClientRes = -(int64_t) FhgfsOpsErr_STORAGE_SRV_CRASHED;
return {true, writeClientRes};
}
ssize_t WriteLocalFileMsgSender::recvPadding(ResponseContext& ctx, int64_t toBeReceived)
{
Config* cfg = Program::getApp()->getConfig();
return ctx.getSocket()->recvT(ctx.getBuffer(),
BEEGFS_MIN(toBeReceived, ctx.getBufferLength()), 0, cfg->getConnMsgMediumTimeout());
}
#ifdef BEEGFS_NVFS
ssize_t WriteLocalFileRDMAMsgSender::recvPadding(ResponseContext& ctx, int64_t toBeReceived)
{
RdmaInfo* rdma = getRdmaInfo();
uint64_t rBuf;
size_t rLen;
uint64_t rOff;
if (!rdma->next(rBuf, rLen, rOff))
return -1;
ssize_t recvLength = BEEGFS_MIN(ctx.getBufferLength(), toBeReceived);
recvLength = BEEGFS_MIN(recvLength, (ssize_t)(rLen - rOff));
return ctx.getSocket()->read(ctx.getBuffer(), recvLength, 0, rBuf+rOff, rdma->key);
}
#endif /* BEEGFS_NVFS */
/**
* Note: New offset is saved in the session by the caller afterwards (to make life easier).
* @return number of written bytes or negative fhgfs error code
*/
template <class Msg, typename WriteState>
int64_t WriteLocalFileMsgExBase<Msg, WriteState>::incrementalRecvAndWriteStateful(NetMessage::ResponseContext& ctx,
SessionLocalFile* sessionLocalFile)
{
std::string logContext = Msg::logContextPref + " (write incremental)";
Config* cfg = Program::getApp()->getConfig();
// we can securely cast getTuneFileWriteSize to size_t below to make a comparision possible, as
// it can technically never be negative and will therefore always fit into size_t
const ssize_t exactStaticRecvSize = sessionLocalFile->getIsDirectIO()
? ctx.getBufferLength()
: BEEGFS_MIN(ctx.getBufferLength(), (size_t)cfg->getTuneFileWriteSize() );
auto& fd = sessionLocalFile->getFD();
int64_t oldOffset = sessionLocalFile->getOffset();
int64_t newOffset = getOffset();
bool useSyncRange = false; // true if sync_file_range should be called
if( (oldOffset < 0) || (oldOffset != newOffset) )
sessionLocalFile->resetWriteCounter(); // reset sequential write counter
else
{ // continue at previous offset => increase sequential write counter
LOG_DEBUG(logContext, Log_SPAM, "Offset: " + StringTk::int64ToStr(getOffset() ) );
sessionLocalFile->incWriteCounter(getCount() );
ssize_t syncSize = unlikely(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) ) ?
0 : cfg->getTuneFileWriteSyncSize();
if (syncSize && (sessionLocalFile->getWriteCounter() >= syncSize) )
useSyncRange = true;
}
// incrementally receive file contents...
WriteState writeState(logContext.c_str(), exactStaticRecvSize,
getCount(), getOffset(), sessionLocalFile);
if (!writeStateInit(writeState))
return -FhgfsOpsErr_COMMUNICATION;
do
{
// receive some bytes...
LOG_DEBUG(logContext, Log_SPAM,
"receiving... (remaining: " + StringTk::intToStr(writeState.toBeReceived) + ")");
ssize_t recvRes = writeStateRecvData(ctx, writeState);
if (recvRes < 0)
{
LogContext(logContext).log(Log_WARNING, "Socket data transfer error occurred. ");
return -FhgfsOpsErr_COMMUNICATION;
}
// forward to mirror...
FhgfsOpsErr mirrorRes = sendToMirror(ctx.getBuffer(), recvRes,
writeState.writeOffset, writeState.toBeReceived, sessionLocalFile);
if(unlikely(mirrorRes != FhgfsOpsErr_SUCCESS) )
{ // mirroring failed
incrementalRecvPadding(ctx, writeState.toBeReceived, sessionLocalFile);
return -FhgfsOpsErr_COMMUNICATION;
}
// write to underlying file system...
int errCode = 0;
ssize_t writeRes = unlikely(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) )
? recvRes
: doWrite(*fd, ctx.getBuffer(), recvRes, writeState.writeOffset, errCode);
writeState.toBeReceived -= recvRes;
// handle write errors...
if(unlikely(writeRes != recvRes) )
{ // didn't write all of the received data
if(writeRes == -1)
{ // write error occurred
LogContext(logContext).log(Log_WARNING, "Write error occurred. "
"FileHandleID: " + sessionLocalFile->getFileHandleID() + "."
"Target: " + StringTk::uintToStr(sessionLocalFile->getTargetID() ) + ". "
"File: " + sessionLocalFile->getFileID() + ". "
"SysErr: " + System::getErrString(errCode) );
LogContext(logContext).log(Log_NOTICE, std::string("Additional info: "
"FD: ") + StringTk::intToStr(*fd) + " " +
"OpenFlags: " + StringTk::intToStr(sessionLocalFile->getOpenFlags() ) + " " +
"received: " + StringTk::intToStr(recvRes) + ".");
incrementalRecvPadding(ctx, writeState.toBeReceived, sessionLocalFile);
return -FhgfsOpsErrTk::fromSysErr(errCode);
}
else
{ // wrote only a part of the data, not all of it
LogContext(logContext).log(Log_WARNING,
"Unable to write all of the received data. "
"target: " + StringTk::uintToStr(sessionLocalFile->getTargetID() ) + "; "
"file: " + sessionLocalFile->getFileID() + "; "
"sysErr: " + System::getErrString(errCode) );
incrementalRecvPadding(ctx, writeState.toBeReceived, sessionLocalFile);
// return bytes received so far minus num bytes that were not written with last write
return (getCount() - writeState.toBeReceived) - (recvRes - writeRes);
}
}
writeState.writeOffset += writeRes;
recvRes = writeStateNext(writeState, writeRes);
if (recvRes != 0)
return recvRes;
} while(writeState.toBeReceived);
LOG_DEBUG(logContext, Log_SPAM,
std::string("Received and wrote all the data") );
// commit to storage device queue...
if (useSyncRange)
{
// advise kernel to commit written data to storage device in max_sectors_kb chunks.
/* note: this is async if there are free slots in the request queue
/sys/block/<...>/nr_requests. (optimal_io_size is not honoured as of linux-3.4) */
off64_t syncSize = sessionLocalFile->getWriteCounter();
off64_t syncOffset = getOffset() + getCount() - syncSize;
MsgHelperIO::syncFileRange(*fd, syncOffset, syncSize);
sessionLocalFile->resetWriteCounter();
}
return getCount();
}
/**
* Write until everything was written (handle short-writes) or an error occured
*/
template <class Msg, typename WriteState>
ssize_t WriteLocalFileMsgExBase<Msg, WriteState>::doWrite(int fd, char* buf, size_t count, off_t offset, int& outErrno)
{
size_t sumWriteRes = 0;
do
{
ssize_t writeRes =
MsgHelperIO::pwrite(fd, buf + sumWriteRes, count - sumWriteRes, offset + sumWriteRes);
if (unlikely(writeRes == -1) )
{
sumWriteRes = (sumWriteRes > 0) ? sumWriteRes : writeRes;
outErrno = errno;
break;
}
sumWriteRes += writeRes;
} while (sumWriteRes != count);
return sumWriteRes;
}
/**
* Receive and discard data.
*/
template <class Msg, typename WriteState>
void WriteLocalFileMsgExBase<Msg, WriteState>::incrementalRecvPadding(NetMessage::ResponseContext& ctx,
int64_t padLen, SessionLocalFile* sessionLocalFile)
{
uint64_t toBeReceived = padLen;
while(toBeReceived)
{
ssize_t recvRes = recvPadding(ctx, toBeReceived);
if (recvRes == -1)
break;
// forward to mirror...
FhgfsOpsErr mirrorRes = sendToMirror(ctx.getBuffer(), recvRes,
getOffset() + padLen - toBeReceived, toBeReceived, sessionLocalFile);
if(unlikely(mirrorRes != FhgfsOpsErr_SUCCESS) )
{ // mirroring failed
/* ... but if we are in this method, then something went wrong anyways, so don't set
needs-resync here or report any error to caller. */
}
toBeReceived -= recvRes;
}
}
template <class Msg, typename WriteState>
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::openFile(const StorageTarget& target,
SessionLocalFile* sessionLocalFile)
{
std::string logContext = Msg::logContextPref + " (write incremental)";
bool useQuota = isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA);
bool enforceQuota = Program::getApp()->getConfig()->getQuotaEnableEnforcement();
bool isBuddyMirrorChunk = sessionLocalFile->getIsMirrorSession();
if (sessionLocalFile->getFD().valid())
return FhgfsOpsErr_SUCCESS; // file already open => nothing to be done here
// file not open yet => get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
isBuddyMirrorChunk &&
!isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
{ // this is a request for a buddymirrored chunk on a non-good primary
LogContext(logContext).log(Log_NOTICE, "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID()));
return FhgfsOpsErr_COMMUNICATION;
}
SessionQuotaInfo quotaInfo(useQuota, enforceQuota, getUserID(), getGroupID() );
FhgfsOpsErr openChunkRes = sessionLocalFile->openFile(targetFD, getPathInfo(), true, &quotaInfo);
return openChunkRes;
}
/**
* Prepares mirroring by storing mirrorNode reference in file session and setting the mirrorToSock
* member variable.
*
* Note: Mirror node reference needs to be released on file session close.
*
* @param buf used to send initial write msg header to mirror.
* @param requestorSock used to receive padding if mirroring fails.
* @return FhgfsOpsErr_COMMUNICATION if communication with mirror failed.
*/
template <class Msg, typename WriteState>
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::prepareMirroring(char* buf, size_t bufLen,
SessionLocalFile* sessionLocalFile, StorageTarget& target)
{
std::string logContext = Msg::logContextPref + " (prepare mirroring)";
// check if mirroring is enabled
if(!isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_FORWARD) )
return FhgfsOpsErr_SUCCESS;
App* app = Program::getApp();
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
TargetStateStore* targetStates = app->getTargetStateStore();
// check if secondary is offline or in unclear state
uint16_t secondaryTargetID = mirrorBuddies->getSecondaryTargetID(getTargetID() );
if(unlikely(!secondaryTargetID) )
{
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
return FhgfsOpsErr_UNKNOWNTARGET;
}
CombinedTargetState secondaryState;
bool getSecondaryStateRes = targetStates->getState(secondaryTargetID, secondaryState);
if(unlikely(!getSecondaryStateRes) )
{
LOG_DEBUG(logContext, Log_DEBUG,
"Refusing request. Secondary target has invalid state. "
"targetID: " + StringTk::uintToStr(secondaryTargetID) );
return FhgfsOpsErr_COMMUNICATION;
}
if( (secondaryState.reachabilityState != TargetReachabilityState_ONLINE) ||
(secondaryState.consistencyState != TargetConsistencyState_GOOD) )
{
if(secondaryState.reachabilityState == TargetReachabilityState_OFFLINE)
{ // buddy is offline => mark needed resync and continue with local operation
LOG_DEBUG(logContext, Log_DEBUG,
"Secondary is offline and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
// buddy is marked offline, so local msg processing will be done and buddy needs resync
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
if(secondaryState.consistencyState != TargetConsistencyState_NEEDS_RESYNC)
{ // unclear buddy state => client must try again
LOG_DEBUG(logContext, Log_DEBUG,
"Unclear secondary state, caller will have to try again later. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
return FhgfsOpsErr_COMMUNICATION;
}
}
// store mirror node reference in session...
NodeHandle mirrorToNode = sessionLocalFile->getMirrorNode();
if(!mirrorToNode)
{
NodeStoreServers* storageNodes = app->getStorageNodes();
TargetMapper* targetMapper = app->getTargetMapper();
FhgfsOpsErr referenceErr;
mirrorToNode = storageNodes->referenceNodeByTargetID(secondaryTargetID, targetMapper,
&referenceErr);
if(unlikely(referenceErr != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).logErr(
"Unable to forward to mirror target: " + StringTk::uintToStr(secondaryTargetID) + "; "
"Error: " + boost::lexical_cast<std::string>(referenceErr));
return referenceErr;
}
mirrorToNode = sessionLocalFile->setMirrorNodeExclusive(mirrorToNode);
}
// send initial write msg header to mirror (retry loop)...
for( ; ; )
{
try
{
// acquire connection to mirror node and send write msg...
mirrorToSock = mirrorToNode->getConnPool()->acquireStreamSocket();
WriteLocalFileMsg mirrorWriteMsg(getClientNumID(), getFileHandleID(), getTargetID(),
getPathInfo(), getAccessFlags(), getOffset(), getCount());
if(doSessionCheck() )
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_SESSION_CHECK);
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) )
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_DISABLE_IO);
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA) )
mirrorWriteMsg.setUserdataForQuota(getUserID(), getGroupID() );
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR);
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
unsigned msgLength = mirrorWriteMsg.serializeMessage(buf, bufLen).second;
mirrorToSock->send(buf, msgLength, 0);
return FhgfsOpsErr_SUCCESS;
}
catch(SocketConnectException& e)
{
LogContext(logContext).log(Log_CRITICAL, "Unable to connect to mirror node: " +
mirrorToNode->getNodeIDWithTypeStr() + "; "
"Msg: " + e.what() );
}
catch(SocketException& e)
{
LogContext(logContext).log(Log_CRITICAL, "Communication with mirror node failed: " +
mirrorToNode->getNodeIDWithTypeStr() + "; "
"Msg: " + e.what() );
if(mirrorToSock)
mirrorToNode->getConnPool()->invalidateStreamSocket(mirrorToSock);
mirrorToSock = NULL;
}
// error occurred if we got here
if(!mirrorRetriesLeft)
break;
mirrorRetriesLeft--;
// next round will be a retry
LogContext(logContext).log(Log_NOTICE, "Retrying mirror communication: " +
mirrorToNode->getNodeIDWithTypeStr() );
} // end of retry for-loop
// all retries exhausted if we got here
return FhgfsOpsErr_COMMUNICATION;
}
/**
* Send file contents to mirror.
*
* Note: Supports retries only at beginning of write msg.
*
* @param buf the buffer that should be sent to the mirror.
* @param offset the offset within the chunk file (only used if communication fails and we need to
* start over with a new WriteMsg to the mirror).
* @param toBeMirrored total remaining mirror data including given bufLen (only used for retries).
* @return FhgfsOpsErr_COMMUNICATION if mirroring fails.
*/
template <class Msg, typename WriteState>
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::sendToMirror(const char* buf, size_t bufLen,
int64_t offset, int64_t toBeMirrored, SessionLocalFile* sessionLocalFile)
{
std::string logContext = Msg::logContextPref + " (send to mirror)";
// check if mirroring enabled
if(!mirrorToSock)
return FhgfsOpsErr_SUCCESS; // either no mirroring enabled or all retries exhausted
bool isRetryRound = false;
// send raw data (retry loop)...
// (note: if sending fails, retrying requires sending of a new WriteMsg)
for( ; ; )
{
try
{
if(unlikely(isRetryRound) )
{ // retry requires reconnect and resend of write msg with current offset
auto mirrorToNode = sessionLocalFile->getMirrorNode();
mirrorToSock = mirrorToNode->getConnPool()->acquireStreamSocket();
WriteLocalFileMsg mirrorWriteMsg(getClientNumID(), getFileHandleID(),
getTargetID(), getPathInfo(), getAccessFlags(), offset, toBeMirrored);
if(doSessionCheck() )
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_SESSION_CHECK);
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) )
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_DISABLE_IO);
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA) )
mirrorWriteMsg.setUserdataForQuota(getUserID(), getGroupID() );
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR);
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
const auto mirrorBuf = MessagingTk::createMsgVec(mirrorWriteMsg);
mirrorToSock->send(&mirrorBuf[0], mirrorBuf.size(), 0);
}
mirrorToSock->send(buf, bufLen, 0);
return FhgfsOpsErr_SUCCESS;
}
catch(SocketConnectException& e)
{
auto mirrorToNode = sessionLocalFile->getMirrorNode();
LogContext(logContext).log(Log_CRITICAL, "Unable to connect to mirror node: " +
mirrorToNode->getNodeIDWithTypeStr() + "; "
"Msg: " + e.what() );
}
catch(SocketException& e)
{
LogContext(logContext).log(Log_CRITICAL, "Communication with mirror node failed: " +
sessionLocalFile->getMirrorNode()->getNodeIDWithTypeStr() + "; "
"Msg: " + e.what() );
if(mirrorToSock)
sessionLocalFile->getMirrorNode()->getConnPool()->invalidateStreamSocket(mirrorToSock);
mirrorToSock = NULL;
}
// error occurred if we got here
if(!mirrorRetriesLeft)
break;
// only allow retries if we're still at the beginning of the write msg.
/* (this is because later we don't have all the client data available; and without the mirror
response we don't know for sure whether previously sent data was really written or not.) */
if(toBeMirrored != getCount() )
break;
mirrorRetriesLeft--;
// next round will be a retry
LogContext(logContext).log(Log_NOTICE, "Retrying mirror communication: " +
sessionLocalFile->getMirrorNode()->getNodeIDWithTypeStr() );
isRetryRound = true;
} // end of retry for-loop
// all retries exhausted if we got here
return FhgfsOpsErr_COMMUNICATION;
}
/**
* Receive response from mirror node, check result, clean up (release mirror sock).
*
* Note: Does not do retries on communication errors
*/
template <class Msg, typename WriteState>
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::finishMirroring(SessionLocalFile* sessionLocalFile,
StorageTarget& target)
{
std::string logContext = Msg::logContextPref + " (finish mirroring)";
// check if mirroring enabled
if(!mirrorToSock)
return FhgfsOpsErr_SUCCESS; // mirroring disabled
App* app = Program::getApp();
auto mirrorToNode = sessionLocalFile->getMirrorNode();
WriteLocalFileRespMsg* writeRespMsg;
int64_t mirrorWriteRes;
// receive write msg response from mirror...
/* note: we don't have the file contents that were sent by the client anymore at this point, so
we cannot do retries here with a new WriteMsg. */
try
{
// receive write msg response...
auto resp = MessagingTk::recvMsgBuf(*mirrorToSock);
if (resp.empty())
{ // error
LogContext(logContext).log(Log_WARNING,
"Failed to receive response from mirror: " + mirrorToSock->getPeername() );
goto cleanup_commerr;
}
// got response => deserialize it...
auto respMsg = app->getNetMessageFactory()->createFromBuf(std::move(resp));
if(unlikely(respMsg->getMsgType() != NETMSGTYPE_WriteLocalFileResp) )
{ // response invalid (wrong msgType)
LogContext(logContext).logErr(
"Received invalid response type: " + StringTk::intToStr(respMsg->getMsgType() ) +"; "
"expected type: " + StringTk::intToStr(NETMSGTYPE_WriteLocalFileResp) + ". "
"Disconnecting: " + mirrorToSock->getPeername() );
goto cleanup_commerr;
}
// check mirror result and release mirror socket...
mirrorToNode->getConnPool()->releaseStreamSocket(mirrorToSock);
writeRespMsg = (WriteLocalFileRespMsg*)respMsg.get();
mirrorWriteRes = writeRespMsg->getValue();
if(likely(mirrorWriteRes == getCount() ) )
return FhgfsOpsErr_SUCCESS; // mirror successfully wrote all of the data
if(mirrorWriteRes >= 0)
{ // mirror only wrote a part of the data
LogContext(logContext).log(Log_WARNING,
"Mirror did not write all of the data (no space left); "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
"fileHandle: " + sessionLocalFile->getFileHandleID() );
return FhgfsOpsErr_NOSPACE;
}
if(mirrorWriteRes == -FhgfsOpsErr_UNKNOWNTARGET)
{
/* local msg processing shall be done and buddy needs resync
(this is normal when a storage is restarted without a broken secondary target, so we
report success to a client in this case) */
LogContext(logContext).log(Log_DEBUG,
"Secondary reports unknown target error and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
if(mirrorWriteRes == -FhgfsOpsErr_STORAGE_SRV_CRASHED)
LogContext(logContext).log(Log_NOTICE, "Potential cache loss for open file handle. "
"(Mirror server crash detected.) "
"FileHandleID: " + sessionLocalFile->getFileHandleID() + "; "
"Mirror: " + mirrorToNode->getNodeIDWithTypeStr() );
// mirror encountered an error
return (FhgfsOpsErr)-mirrorWriteRes; // write response contains negative fhgfs error code
}
catch(SocketException& e)
{
LogContext(logContext).logErr(std::string("SocketException: ") + e.what() );
LogContext(logContext).log(Log_WARNING, "Additional info: "
"mirror node: " + mirrorToNode->getNodeIDWithTypeStr() + "; "
"fileHandle: " + sessionLocalFile->getFileHandleID() );
}
// cleanup after communication error...
cleanup_commerr:
mirrorToNode->getConnPool()->invalidateStreamSocket(mirrorToSock);
return FhgfsOpsErr_COMMUNICATION;
}
template <class Msg, typename WriteState>
bool WriteLocalFileMsgExBase<Msg, WriteState>::doSessionCheck()
{ // do session check only when it is not a mirror session
return isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR) ? false :
isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_SESSION_CHECK);
}

View File

@@ -0,0 +1,213 @@
#pragma once
#include <common/net/message/session/rw/WriteLocalFileMsg.h>
#include <common/net/message/session/rw/WriteLocalFileRespMsg.h>
#include <session/SessionLocalFile.h>
#include <common/storage/StorageErrors.h>
#define WRITEMSG_MIRROR_RETRIES_NUM 1
class StorageTarget;
/**
* Contains common data needed by implementations of the network protocol
* that receive data from the client.
*/
struct WriteStateBase
{
const char* logContext;
ssize_t exactStaticRecvSize;
ssize_t recvLength;
int64_t toBeReceived;
off_t writeOffset;
SessionLocalFile* sessionLocalFile;
WriteStateBase(const char* logContext, ssize_t exactStaticRecvSize,
int64_t toBeReceived, off_t writeOffset, SessionLocalFile* sessionLocalFile)
{
this->logContext = logContext;
this->exactStaticRecvSize = exactStaticRecvSize;
this->toBeReceived = toBeReceived;
this->writeOffset = writeOffset;
this->sessionLocalFile = sessionLocalFile;
recvLength = BEEGFS_MIN(exactStaticRecvSize, toBeReceived);
}
};
template <class Msg, typename WriteState>
class WriteLocalFileMsgExBase : public Msg
{
private:
Socket* mirrorToSock;
unsigned mirrorRetriesLeft;
public:
bool processIncoming(NetMessage::ResponseContext& ctx);
WriteLocalFileMsgExBase() : Msg()
{
mirrorToSock = NULL;
mirrorRetriesLeft = WRITEMSG_MIRROR_RETRIES_NUM;
}
private:
std::pair<bool, int64_t> write(NetMessage::ResponseContext& ctx);
ssize_t doWrite(int fd, char* buf, size_t count, off_t offset, int& outErrno);
FhgfsOpsErr openFile(const StorageTarget& target, SessionLocalFile* sessionLocalFile);
FhgfsOpsErr prepareMirroring(char* buf, size_t bufLen,
SessionLocalFile* sessionLocalFile, StorageTarget& target);
FhgfsOpsErr sendToMirror(const char* buf, size_t bufLen, int64_t offset, int64_t toBeMirrored,
SessionLocalFile* sessionLocalFile);
FhgfsOpsErr finishMirroring(SessionLocalFile* sessionLocalFile, StorageTarget& target);
bool doSessionCheck();
int64_t incrementalRecvAndWriteStateful(NetMessage::ResponseContext& ctx,
SessionLocalFile* sessionLocalFile);
void incrementalRecvPadding(NetMessage::ResponseContext& ctx, int64_t padLen,
SessionLocalFile* sessionLocalFile);
inline ssize_t recvPadding(NetMessage::ResponseContext& ctx, int64_t toBeReceived)
{
return static_cast<Msg&>(*this).recvPadding(ctx, toBeReceived);
}
inline void sendResponse(NetMessage::ResponseContext& ctx, int err)
{
return static_cast<Msg&>(*this).sendResponse(ctx, err);
}
inline bool writeStateInit(WriteState& ws)
{
return static_cast<Msg&>(*this).writeStateInit(ws);
}
inline ssize_t writeStateRecvData(NetMessage::ResponseContext& ctx, WriteState& ws)
{
return static_cast<Msg&>(*this).writeStateRecvData(ctx, ws);
}
inline size_t writeStateNext(WriteState& ws, ssize_t writeRes)
{
return static_cast<Msg&>(*this).writeStateNext(ws, writeRes);
}
public:
inline bool isMsgValid() const
{
return static_cast<const Msg&>(*this).isMsgValid();
}
inline bool isMsgHeaderFeatureFlagSet(unsigned flag) const
{
return static_cast<const Msg&>(*this).isMsgHeaderFeatureFlagSet(flag);
}
inline unsigned getMsgHeaderUserID() const
{
return static_cast<const Msg&>(*this).getMsgHeaderUserID();
}
inline uint16_t getTargetID() const
{
return static_cast<const Msg&>(*this).getTargetID();
}
inline int64_t getOffset() const
{
return static_cast<const Msg&>(*this).getOffset();
}
inline unsigned getUserID() const
{
return static_cast<const Msg&>(*this).getUserID();
}
inline unsigned getGroupID() const
{
return static_cast<const Msg&>(*this).getGroupID();
}
inline int64_t getCount() const
{
return static_cast<const Msg&>(*this).getCount();
}
inline const char* getFileHandleID()
{
return static_cast<Msg&>(*this).getFileHandleID();
}
inline NumNodeID getClientNumID() const
{
return static_cast<const Msg&>(*this).getClientNumID();
}
inline unsigned getAccessFlags() const
{
return static_cast<const Msg&>(*this).getAccessFlags();
}
inline PathInfo* getPathInfo ()
{
return static_cast<Msg&>(*this).getPathInfo();
}
};
/**
* Implements the recv protocol.
*/
class WriteLocalFileMsgSender : public WriteLocalFileMsg
{
public:
struct WriteState : public WriteStateBase
{
WriteState(const char* logContext, ssize_t exactStaticRecvSize,
int64_t toBeReceived, off_t writeOffset, SessionLocalFile* sessionLocalFile) :
WriteStateBase(logContext, exactStaticRecvSize, toBeReceived, writeOffset,
sessionLocalFile) {}
};
private:
friend class WriteLocalFileMsgExBase<WriteLocalFileMsgSender, WriteState>;
static const std::string logContextPref;
ssize_t recvPadding(ResponseContext& ctx, int64_t toBeReceived);
inline void sendResponse(ResponseContext& ctx, int err)
{
ctx.sendResponse(WriteLocalFileRespMsg(err));
}
inline bool writeStateInit(WriteState& ws)
{
return true;
}
inline ssize_t writeStateRecvData(ResponseContext& ctx, WriteState& ws)
{
AbstractApp* app = PThread::getCurrentThreadApp();
int connMsgMediumTimeout = app->getCommonConfig()->getConnMsgMediumTimeout();
ws.recvLength = BEEGFS_MIN(ws.exactStaticRecvSize, ws.toBeReceived);
return ctx.getSocket()->recvExactT(ctx.getBuffer(), ws.recvLength, 0, connMsgMediumTimeout);
}
inline size_t writeStateNext(WriteState& ws, ssize_t writeRes)
{
return 0;
}
};
typedef WriteLocalFileMsgExBase<WriteLocalFileMsgSender,
WriteLocalFileMsgSender::WriteState> WriteLocalFileMsgEx;

View File

@@ -0,0 +1,94 @@
#pragma once
#ifdef BEEGFS_NVFS
#include <common/net/message/session/rw/WriteLocalFileRDMAMsg.h>
#include <common/net/message/session/rw/WriteLocalFileRDMARespMsg.h>
#include <common/components/worker/Worker.h>
#include <session/SessionLocalFile.h>
#include <common/storage/StorageErrors.h>
#include "WriteLocalFileMsgEx.h"
/**
* Implements RDMA read protocol.
*/
class WriteLocalFileRDMAMsgSender : public WriteLocalFileRDMAMsg
{
public:
struct WriteState : public WriteStateBase
{
RdmaInfo* rdma;
uint64_t rBuf;
size_t rLen;
uint64_t rOff;
int64_t recvSize;
WriteState(const char* logContext, ssize_t exactStaticRecvSize,
int64_t toBeReceived, off_t writeOffset, SessionLocalFile* sessionLocalFile) :
WriteStateBase(logContext, exactStaticRecvSize, toBeReceived, writeOffset,
sessionLocalFile)
{
recvSize = toBeReceived;
}
};
private:
friend class WriteLocalFileMsgExBase<WriteLocalFileRDMAMsgSender, WriteState>;
static const std::string logContextPref;
ssize_t recvPadding(ResponseContext& ctx, int64_t toBeReceived);
inline void sendResponse(ResponseContext& ctx, int err)
{
ctx.sendResponse(WriteLocalFileRDMARespMsg(err));
}
inline bool writeStateInit(WriteState& ws)
{
ws.rdma = getRdmaInfo();
if (unlikely(!ws.rdma->next(ws.rBuf, ws.rLen, ws.rOff)))
{
LogContext(ws.logContext).logErr("No entities in RDMA buffers.");
return false;
}
return true;
}
inline ssize_t writeStateRecvData(ResponseContext& ctx, WriteState& ws)
{
// Cannot RDMA anything larger than WORKER_BUFIN_SIZE in a single operation
// because that is the size of the buffer passed in by the Worker.
// TODO: pass around a Buffer with a length instead of unqualified char*.
ws.recvLength = BEEGFS_MIN(
BEEGFS_MIN(
BEEGFS_MIN(ws.exactStaticRecvSize, ws.toBeReceived),
(ssize_t)(ws.rLen - ws.rOff)),
WORKER_BUFIN_SIZE);
return ctx.getSocket()->read(ctx.getBuffer(), ws.recvLength, 0, ws.rBuf + ws.rOff, ws.rdma->key);
}
inline size_t writeStateNext(WriteState& ws, ssize_t writeRes)
{
ws.rOff += writeRes;
if (ws.toBeReceived > 0 && ws.rOff == ws.rLen)
{
if (unlikely(!ws.rdma->next(ws.rBuf, ws.rLen, ws.rOff)))
{
LogContext(ws.logContext).logErr("RDMA buffers expended but not all data received. toBeReceived=" +
StringTk::uint64ToStr(ws.toBeReceived) + "; "
"target: " + StringTk::uintToStr(ws.sessionLocalFile->getTargetID() ) + "; "
"file: " + ws.sessionLocalFile->getFileID() + "; ");
return ws.recvSize - ws.toBeReceived;
}
}
return 0;
}
};
typedef WriteLocalFileMsgExBase<WriteLocalFileRDMAMsgSender,
WriteLocalFileRDMAMsgSender::WriteState> WriteLocalFileRDMAMsgEx;
#endif /* BEEGFS_NVFS */

View File

@@ -0,0 +1,20 @@
#include <program/Program.h>
#include <common/net/message/storage/GetHighResStatsRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include "GetHighResStatsMsgEx.h"
bool GetHighResStatsMsgEx::processIncoming(ResponseContext& ctx)
{
HighResStatsList statsHistory;
uint64_t lastStatsMS = getValue();
// get stats history
StatsCollector* statsCollector = Program::getApp()->getStatsCollector();
statsCollector->getStatsSince(lastStatsMS, statsHistory);
ctx.sendResponse(GetHighResStatsRespMsg(&statsHistory) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/storage/StorageErrors.h>
#include <common/net/message/storage/GetHighResStatsMsg.h>
class GetHighResStatsMsgEx : public GetHighResStatsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,56 @@
#include <program/Program.h>
#include <common/net/message/storage/StatStoragePathRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include "StatStoragePathMsgEx.h"
bool StatStoragePathMsgEx::processIncoming(ResponseContext& ctx)
{
int64_t sizeTotal = 0;
int64_t sizeFree = 0;
int64_t inodesTotal = 0;
int64_t inodesFree = 0;
FhgfsOpsErr statRes = statStoragePath(&sizeTotal, &sizeFree, &inodesTotal, &inodesFree);
ctx.sendResponse(StatStoragePathRespMsg(statRes, sizeTotal, sizeFree, inodesTotal, inodesFree) );
App* app = Program::getApp();
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_STATSTORAGEPATH, getMsgHeaderUserID() );
return true;
}
FhgfsOpsErr StatStoragePathMsgEx::statStoragePath(int64_t* outSizeTotal, int64_t* outSizeFree,
int64_t* outInodesTotal, int64_t* outInodesFree)
{
const char* logContext = "StatStoragePathMsg (stat path)";
App* app = Program::getApp();
auto* const target = app->getStorageTargets()->getTarget(getTargetID());
if (!target)
{
LogContext(logContext).logErr("Unknown targetID: " + StringTk::uintToStr(getTargetID() ) );
return FhgfsOpsErr_UNKNOWNTARGET;
}
const auto& targetPath = target->getPath().str();
bool statSuccess = StorageTk::statStoragePath(targetPath, outSizeTotal, outSizeFree,
outInodesTotal, outInodesFree);
if(unlikely(!statSuccess) )
{ // error
LogContext(logContext).logErr("Unable to statfs() storage path: " + targetPath +
" (SysErr: " + System::getErrString() );
return FhgfsOpsErr_INTERNAL;
}
// read and use value from manual free space override file (if it exists)
StorageTk::statStoragePathOverride(targetPath, outSizeFree, outInodesFree);
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,18 @@
#pragma once
#include <common/storage/StorageErrors.h>
#include <common/net/message/storage/StatStoragePathMsg.h>
// stat of the path to the storage directory, result is similar to statfs
class StatStoragePathMsgEx : public StatStoragePathMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
FhgfsOpsErr statStoragePath(int64_t* outSizeTotal, int64_t* outSizeFree,
int64_t* outInodesTotal, int64_t* outInodesFree);
};

View File

@@ -0,0 +1,432 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/TruncLocalFileRespMsg.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "TruncLocalFileMsgEx.h"
#include <boost/lexical_cast.hpp>
#define TRUNCLOCALFILE_CHUNKOPENLAGS (O_CREAT|O_WRONLY|O_LARGEFILE)
bool TruncLocalFileMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "TruncChunkFileMsg incoming";
App* app = Program::getApp();
uint16_t targetID;
int targetFD;
bool chunkLocked = false;
FhgfsOpsErr clientErrRes;
DynamicAttribs dynAttribs; // inits storageVersion to 0 (=> initially invalid)
StorageTarget* target;
// select the right targetID
targetID = getTargetID();
if(isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
if(unlikely(!targetID) )
{ // unknown group ID
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{ // unknown targetID
if (isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR))
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
mgmt will mark this target as (p)offline in a few moments. */
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
return true;
}
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
return true;
}
{ // get targetFD and check consistency state
bool skipResponse = false;
targetFD = getTargetFD(*target, ctx, &skipResponse);
if(unlikely(targetFD == -1) )
{ // failed => consistency state not good
if(skipResponse)
goto skip_response; // GenericResponseMsg sent
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
// forward to secondary (if appropriate)
clientErrRes = forwardToSecondary(*target, ctx, &chunkLocked);
if(unlikely(clientErrRes != FhgfsOpsErr_SUCCESS) )
{
if(clientErrRes == FhgfsOpsErr_COMMUNICATION)
goto skip_response; // GenericResponseMsg sent
goto send_response;
}
{ // valid targetID
std::string entryID(getEntryID() );
// generate path to chunk file...
Path chunkDirPath;
std::string chunkFilePathStr;
const PathInfo *pathInfo = getPathInfo();
bool hasOrigFeature = pathInfo->hasOrigFeature();
StorageTk::getChunkDirChunkFilePath(pathInfo, entryID, hasOrigFeature, chunkDirPath,
chunkFilePathStr);
// truncate file...
clientErrRes = truncFile(targetID, targetFD, &chunkDirPath, chunkFilePathStr, entryID,
hasOrigFeature);
/* clientErrRes == FhgfsOpsErr_PATHNOTEXISTS && !getFileSize() is special we need to fake
* the attributes, to inform the metaserver about the new file size with storageVersion!=0 */
if(clientErrRes == FhgfsOpsErr_SUCCESS ||
(clientErrRes == FhgfsOpsErr_PATHNOTEXISTS && !getFilesize() ) )
{ // truncation successful
LOG_DEBUG(logContext, Log_DEBUG, "File truncated: " + chunkFilePathStr);
// get updated dynamic attribs...
if(!isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_NODYNAMICATTRIBS) )
{
if (clientErrRes == FhgfsOpsErr_SUCCESS)
getDynamicAttribsByPath(targetFD, chunkFilePathStr.c_str(), targetID, entryID,
dynAttribs);
else
{ // clientErrRes == FhgfsOpsErr_PATHNOTEXISTS && !getFileSize()
getFakeDynAttribs(targetID, entryID, dynAttribs);
}
}
// change to SUCCESS if it was FhgfsOpsErr_PATHNOTEXISTS
clientErrRes = FhgfsOpsErr_SUCCESS;
}
}
send_response:
if(chunkLocked) // unlock chunk
app->getChunkLockStore()->unlockChunk(targetID, getEntryID() );
// send response...
ctx.sendResponse(
TruncLocalFileRespMsg(clientErrRes, dynAttribs.filesize, dynAttribs.allocedBlocks,
dynAttribs.modificationTimeSecs, dynAttribs.lastAccessTimeSecs,
dynAttribs.storageVersion) );
skip_response:
// update operation counters
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_TRUNCLOCALFILE, getMsgHeaderUserID() );
return true;
}
/**
* @param outResponseSent true if a response was sent from within this method; can only be true if
* -1 is returned.
* @return -1 if consistency state was not good (in which case a special response is sent within
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
*/
int TruncLocalFileMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
bool* outResponseSent)
{
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR);
*outResponseSent = false;
// get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
isBuddyMirrorChunk &&
!isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
{ // this is a msg to a non-good primary
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID());
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
*outResponseSent = true;
return -1;
}
return targetFD;
}
FhgfsOpsErr TruncLocalFileMsgEx::truncFile(uint16_t targetId, int targetFD,
const Path* chunkDirPath, const std::string& chunkFilePathStr, std::string entryID,
bool hasOrigFeature)
{
const char* logContext = "TruncLocalFileMsg incoming";
App* app = Program::getApp();
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
int truncRes = MsgHelperIO::truncateAt(targetFD, chunkFilePathStr.c_str(), getFilesize() );
if(!truncRes)
return FhgfsOpsErr_SUCCESS; // truncate succeeded
// file or path just doesn't exist or real error?
int truncErrCode = errno;
if(unlikely(truncErrCode != ENOENT) )
{ // error
clientErrRes = FhgfsOpsErrTk::fromSysErr(truncErrCode);
if (clientErrRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
LogContext(logContext).logErr("Unable to truncate file: " + chunkFilePathStr + ". " +
"SysErr: " + System::getErrString(truncErrCode) );
return clientErrRes;
}
// ENOENT => file (and possibly path to file (dirs) ) doesn't exist
/* note: if the file doesn't exist, it's generally not an error.
but if it should grow to a certain size, we have to create it... */
if(!getFilesize() )
return FhgfsOpsErr_PATHNOTEXISTS; // nothing to be done
// create the file and re-size it
bool useQuota = isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_USE_QUOTA);
bool enforceQuota = app->getConfig()->getQuotaEnableEnforcement();
SessionQuotaInfo quotaInfo(useQuota, enforceQuota, getUserID(), getGroupID());
const ExceededQuotaStorePtr exceededQuotaStore = app->getExceededQuotaStores()->get(targetId);
ChunkStore* chunkDirStore = app->getChunkDirStore();
int fd;
int openFlags = TRUNCLOCALFILE_CHUNKOPENLAGS;
FhgfsOpsErr mkChunkRes = chunkDirStore->openChunkFile(targetFD, chunkDirPath, chunkFilePathStr,
hasOrigFeature, openFlags, &fd, &quotaInfo, exceededQuotaStore);
if (unlikely(mkChunkRes == FhgfsOpsErr_NOTOWNER && useQuota) )
{
// it already logs a message, so need to further check this ret value
chunkDirStore->chmodV2ChunkDirPath(targetFD, chunkDirPath, entryID);
mkChunkRes = chunkDirStore->openChunkFile(
targetFD, chunkDirPath, chunkFilePathStr, hasOrigFeature, openFlags, &fd, &quotaInfo,
exceededQuotaStore);
}
if (mkChunkRes != FhgfsOpsErr_SUCCESS)
{
if (mkChunkRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
LogContext(logContext).logErr("Failed to create chunkFile: " + chunkFilePathStr);
return mkChunkRes;
}
// file created => trunc it
int ftruncRes = ftruncate(fd, getFilesize() );
if(unlikely(ftruncRes == -1) )
{ // error
clientErrRes = FhgfsOpsErrTk::fromSysErr(errno);
if (clientErrRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
LogContext(logContext).logErr(
"Unable to truncate file (after creation): " + chunkFilePathStr + ". " +
"Length: " + StringTk::int64ToStr(getFilesize() ) + ". " +
"SysErr: " + System::getErrString() );
}
// close file
int closeRes = close(fd);
if(unlikely(closeRes == -1) )
{ // error
clientErrRes = FhgfsOpsErrTk::fromSysErr(errno);
if (clientErrRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
LogContext(logContext).logErr(
"Unable to close file (after creation/truncation): " + chunkFilePathStr + ". " +
"Length: " + StringTk::int64ToStr(getFilesize() ) + ". " +
"SysErr: " + System::getErrString() );
}
return clientErrRes;
}
bool TruncLocalFileMsgEx::getDynamicAttribsByPath(const int dirFD, const char* path,
uint16_t targetID, std::string fileID, DynamicAttribs& outDynAttribs)
{
SyncedStoragePaths* syncedPaths = Program::getApp()->getSyncedStoragePaths();
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // L O C K path
// note: this is locked because we need to get the filesize together with the storageVersion
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(dirFD, path,
&outDynAttribs.filesize, &outDynAttribs.allocedBlocks, &outDynAttribs.modificationTimeSecs,
&outDynAttribs.lastAccessTimeSecs);
if(getDynAttribsRes)
outDynAttribs.storageVersion = storageVersion;
syncedPaths->unlockPath(fileID, targetID); // U N L O C K path
return getDynAttribsRes;
}
/**
* Note: only for fileSize == 0 and if the file does not exist yet
*/
bool TruncLocalFileMsgEx::getFakeDynAttribs(uint16_t targetID, std::string fileID,
DynamicAttribs& outDynAttribs)
{
SyncedStoragePaths* syncedPaths = Program::getApp()->getSyncedStoragePaths();
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // L O C K path
int64_t currentTimeSecs = TimeAbs().getTimeval()->tv_sec;
outDynAttribs.filesize = 0;
outDynAttribs.allocedBlocks = 0;
outDynAttribs.modificationTimeSecs = currentTimeSecs;
outDynAttribs.lastAccessTimeSecs = currentTimeSecs; /* actually not correct, but better than
* 1970 */
outDynAttribs.storageVersion = storageVersion;
syncedPaths->unlockPath(fileID, targetID); // U N L O C K path
return true;
}
/**
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
*
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
* case *outChunkLocked==false is guaranteed).
* @throw SocketException if sending of GenericResponseMsg fails.
*/
FhgfsOpsErr TruncLocalFileMsgEx::forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked)
{
const char* logContext = "TruncLocalFileMsgEx incoming (forward to secondary)";
App* app = Program::getApp();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
*outChunkLocked = false;
if(!isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR) ||
isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
return FhgfsOpsErr_SUCCESS; // nothing to do
// mirrored chunk should be modified, check if resync is in progress and lock chunk
*outChunkLocked = target.getBuddyResyncInProgress();
if(*outChunkLocked)
chunkLockStore->lockChunk(target.getID(), getEntryID() ); // lock chunk
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
addMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_TruncLocalFileResp);
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
// remove the flag that we just added for secondary
unsetMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
if(unlikely(
(commRes == FhgfsOpsErr_COMMUNICATION) &&
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
{
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
// buddy is marked offline, so local msg processing will be done and buddy needs resync
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
}
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_DEBUG, "Forwarding failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
"error: " + boost::lexical_cast<std::string>(commRes));
if(*outChunkLocked)
{ // unlock chunk
chunkLockStore->unlockChunk(target.getID(), getEntryID() );
*outChunkLocked = false;
}
std::string genericRespStr = "Communication with secondary failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(genericRespStr)));
return FhgfsOpsErr_COMMUNICATION;
}
TruncLocalFileRespMsg* respMsg = (TruncLocalFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr secondaryRes = respMsg->getResult();
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
{
if(secondaryRes == FhgfsOpsErr_UNKNOWNTARGET)
{
/* local msg processing shall be done and buddy needs resync
(this is normal when a storage is restarted without a broken secondary target, so we
report success to a client in this case) */
LogContext(logContext).log(Log_DEBUG,
"Secondary reports unknown target error and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
if(secondaryRes != FhgfsOpsErr_TOOBIG) // "too big" is a valid error if max filesize exceeded
{
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
boost::lexical_cast<std::string>(secondaryRes) + "; "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
}
return secondaryRes;
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,37 @@
#pragma once
#include <common/net/message/storage/TruncLocalFileMsg.h>
#include <common/storage/StorageErrors.h>
#include <common/storage/Path.h>
class StorageTarget;
class TruncLocalFileMsgEx : public TruncLocalFileMsg
{
private:
struct DynamicAttribs
{
DynamicAttribs() : filesize(0), allocedBlocks(0), modificationTimeSecs(0),
lastAccessTimeSecs(0), storageVersion(0) {}
int64_t filesize;
int64_t allocedBlocks; // allocated 512byte blocks (relevant for sparse files)
int64_t modificationTimeSecs;
int64_t lastAccessTimeSecs;
uint64_t storageVersion;
};
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
FhgfsOpsErr truncFile(uint16_t targetId, int targetFD, const Path* chunkDirPath,
const std::string& chunkFilePathStr, std::string entryID, bool hasOrigFeature);
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
bool getDynamicAttribsByPath(const int dirFD, const char* path, uint16_t targetID,
std::string fileID, DynamicAttribs& outDynAttribs);
bool getFakeDynAttribs(uint16_t targetID, std::string fileID, DynamicAttribs& outDynAttribs);
FhgfsOpsErr forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked);
};

View File

@@ -0,0 +1,152 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/attribs/GetChunkFileAttribsRespMsg.h>
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "GetChunkFileAttribsMsgEx.h"
bool GetChunkFileAttribsMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "GetChunkFileAttribsMsg incoming";
App* app = Program::getApp();
std::string entryID(getEntryID() );
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
int targetFD;
struct stat statbuf{};
uint64_t storageVersion = 0;
// select the right targetID
uint16_t targetID = getTargetID();
if(isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
// note: only log message here, error handling will happen below through invalid targetFD
if(unlikely(!targetID) )
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
}
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
if (isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR))
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
mgmt will mark this target as (p)offline in a few moments. */
LOG(GENERAL, NOTICE, "Unknown target ID, refusing request.", targetID);
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
return true;
}
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
{ // get targetFD and check consistency state
bool skipResponse = false;
targetFD = getTargetFD(*target, ctx, &skipResponse);
if(unlikely(targetFD == -1) )
{ // failed => consistency state not good
memset(&statbuf, 0, sizeof(statbuf) ); // (just to mute clang warning)
if(skipResponse)
goto skip_response; // GenericResponseMsg sent
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
{ // valid targetID
SyncedStoragePaths* syncedPaths = app->getSyncedStoragePaths();
int statErrCode = 0;
std::string chunkPath = StorageTk::getFileChunkPath(getPathInfo(), entryID);
uint64_t newStorageVersion = syncedPaths->lockPath(entryID, targetID); // L O C K path
int statRes = fstatat(targetFD, chunkPath.c_str(), &statbuf, 0);
if(statRes)
{ // file not exists or error
statErrCode = errno;
}
else
{
storageVersion = newStorageVersion;
}
syncedPaths->unlockPath(entryID, targetID); // U N L O C K path
// note: non-existing file is not an error (storage version is 0, so nothing will be
// updated at the metadata node)
if((statRes == -1) && (statErrCode != ENOENT))
{ // error
clientErrRes = FhgfsOpsErr_INTERNAL;
LogContext(logContext).logErr(
"Unable to stat file: " + chunkPath + ". " + "SysErr: "
+ System::getErrString(statErrCode));
}
}
send_response:
ctx.sendResponse(
GetChunkFileAttribsRespMsg(clientErrRes, statbuf.st_size, statbuf.st_blocks,
statbuf.st_mtime, statbuf.st_atime, storageVersion) );
skip_response:
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_GETLOCALFILESIZE, getMsgHeaderUserID() );
return true;
}
/**
* @param outResponseSent true if a response was sent from within this method; can only be true if
* -1 is returned.
* @return -1 if consistency state was not good (in which case a special response is sent within
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
*/
int GetChunkFileAttribsMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
bool* outResponseSent)
{
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR);
*outResponseSent = false;
// get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
isBuddyMirrorChunk &&
!isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR_SECOND) )
{ // this is a msg to a non-good primary
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID());
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
*outResponseSent = true;
return -1;
}
return targetFD;
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include <common/net/message/storage/attribs/GetChunkFileAttribsMsg.h>
class StorageTarget;
class GetChunkFileAttribsMsgEx : public GetChunkFileAttribsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
};

View File

@@ -0,0 +1,351 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/attribs/SetLocalAttrRespMsg.h>
#include <common/storage/StorageDefinitions.h>
#include <common/toolkit/MessagingTk.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "SetLocalAttrMsgEx.h"
#include <utime.h>
#include <boost/lexical_cast.hpp>
bool SetLocalAttrMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "SetLocalAttrMsgEx incoming";
App* app = Program::getApp();
const SettableFileAttribs* attribs = getAttribs();
int validAttribs = getValidAttribs();
uint16_t targetID;
bool chunkLocked = false;
int targetFD;
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
DynamicFileAttribs currentDynAttribs(0, 0, 0, 0, 0);
StorageTarget* target;
// select the right targetID
targetID = getTargetID();
if(isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
if(unlikely(!targetID) )
{ // unknown group ID
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{ // unknown targetID
if (isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR))
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
mgmt will mark this target as (p)offline in a few moments. */
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
return true;
}
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
return true;
}
{ // get targetFD and check consistency state
bool skipResponse = false;
targetFD = getTargetFD(*target, ctx, &skipResponse);
if(unlikely(targetFD == -1) )
{ // failed => consistency state not good
if(skipResponse)
goto skip_response; // GenericResponseMsg sent
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
// forward to secondary (if appropriate)
clientErrRes = forwardToSecondary(*target, ctx, &chunkLocked);
if(unlikely(clientErrRes != FhgfsOpsErr_SUCCESS) )
{
if(clientErrRes == FhgfsOpsErr_COMMUNICATION)
goto skip_response; // GenericResponseMsg sent
goto send_response;
}
if(validAttribs & (SETATTR_CHANGE_MODIFICATIONTIME | SETATTR_CHANGE_LASTACCESSTIME) )
{ // we only handle access and modification time updates here
struct timespec times[2] = {{0, 0}, {0, 0}};
if (validAttribs & SETATTR_CHANGE_LASTACCESSTIME)
{
times[MsgHelperIO_ATIME_POS].tv_sec = attribs->lastAccessTimeSecs;
times[MsgHelperIO_ATIME_POS].tv_nsec = 0;
}
else
times[MsgHelperIO_ATIME_POS].tv_nsec = UTIME_OMIT;
if (validAttribs & SETATTR_CHANGE_MODIFICATIONTIME)
{
times[MsgHelperIO_MTIME_POS].tv_sec = attribs->modificationTimeSecs;
times[MsgHelperIO_MTIME_POS].tv_nsec = 0;
}
else
times[MsgHelperIO_MTIME_POS].tv_nsec = UTIME_OMIT;
// generate path to chunk file...
std::string pathStr;
pathStr = StorageTk::getFileChunkPath(getPathInfo(), getEntryID() );
// update timestamps...
// in case of a timestamp update we need extra information on the metadata server, namely
// a storageVersion and the current dynamic attribs of the chunk
// => set the new times while holding the lock and return the current attribs and a
// storageVersion in response later
uint64_t storageVersion = Program::getApp()->getSyncedStoragePaths()->lockPath(getEntryID(),
targetID);
int utimeRes = MsgHelperIO::utimensat(targetFD, pathStr.c_str(), times, 0);
if (utimeRes == 0)
{
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(targetFD, pathStr.c_str(),
&currentDynAttribs.fileSize, &currentDynAttribs.numBlocks,
&currentDynAttribs.modificationTimeSecs, &currentDynAttribs.lastAccessTimeSecs);
// If stat failed (after utimensat worked!), something really bad happened, so the
// attribs are definitely invalid. Otherwise set storageVersion in dynAttribs
if (getDynAttribsRes)
currentDynAttribs.storageVersion = storageVersion;
}
else if (errno == ENOENT)
{
// Entry doesn't exist. Not an error, but we need to return fake dynamic attributes for
// the metadata server to calc the values (fake in this sense means, we send the
// timestamps back that we tried to set, but have real filesize and numBlocks, i.e. 0
currentDynAttribs.storageVersion = storageVersion;
currentDynAttribs.fileSize = 0;
currentDynAttribs.numBlocks = 0;
currentDynAttribs.modificationTimeSecs = attribs->modificationTimeSecs;
currentDynAttribs.lastAccessTimeSecs = attribs->lastAccessTimeSecs;
}
else
{ // error
int errCode = errno;
LogContext(logContext).logErr("Unable to change file time: " + pathStr + ". "
"SysErr: " + System::getErrString());
clientErrRes = FhgfsOpsErrTk::fromSysErr(errCode);
}
Program::getApp()->getSyncedStoragePaths()->unlockPath(getEntryID(), targetID);
}
if(isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_USE_QUOTA) &&
(validAttribs & (SETATTR_CHANGE_USERID | SETATTR_CHANGE_GROUPID) ) )
{ // we only handle UID and GID updates here
uid_t uid = -1;
gid_t gid = -1;
if(validAttribs & SETATTR_CHANGE_USERID)
uid = attribs->userID;
if(validAttribs & SETATTR_CHANGE_GROUPID)
gid = attribs->groupID;
// generate path to chunk file...
std::string pathStr;
pathStr = StorageTk::getFileChunkPath(getPathInfo(), getEntryID() );
// update UID and GID...
int chownRes = fchownat(targetFD, pathStr.c_str(), uid, gid, 0);
if(chownRes == -1)
{ // could be an error
int errCode = errno;
if(errCode != ENOENT)
{ // unhandled chown() error
LogContext(logContext).logErr("Unable to change file owner: " + pathStr + ". "
"SysErr: " + System::getErrString() );
clientErrRes = FhgfsOpsErrTk::fromSysErr(errCode);
}
}
}
send_response:
if(chunkLocked) // unlock chunk
app->getChunkLockStore()->unlockChunk(targetID, getEntryID() );
ctx.sendResponse(SetLocalAttrRespMsg(clientErrRes, currentDynAttribs));
skip_response:
// update operation counters...
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_SETLOCALATTR,
getMsgHeaderUserID() );
return true;
}
/**
* @param outResponseSent true if a response was sent from within this method; can only be true if
* -1 is returned.
* @return -1 if consistency state was not good (in which case a special response is sent within
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
*/
int SetLocalAttrMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
bool* outResponseSent)
{
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR);
*outResponseSent = false;
// get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
isBuddyMirrorChunk &&
!isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND) )
{ // this is a msg to a non-good primary
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID());
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
*outResponseSent = true;
return -1;
}
return targetFD;
}
/**
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
*
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
* case *outChunkLocked==false is guaranteed).
* @throw SocketException if sending of GenericResponseMsg fails.
*/
FhgfsOpsErr SetLocalAttrMsgEx::forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked)
{
const char* logContext = "SetLocalAttrMsg incoming (forward to secondary)";
App* app = Program::getApp();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
*outChunkLocked = false;
if(!isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR) ||
isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND) )
return FhgfsOpsErr_SUCCESS; // nothing to do
// mirrored chunk should be modified, check if resync is in progress and lock chunk
*outChunkLocked = target.getBuddyResyncInProgress();
if(*outChunkLocked)
chunkLockStore->lockChunk(target.getID(), getEntryID() ); // lock chunk
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
addMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND);
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_SetLocalAttrResp);
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
// remove the flag that we just added for secondary
unsetMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND);
if(unlikely(
(commRes == FhgfsOpsErr_COMMUNICATION) &&
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
{
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
// buddy is marked offline, so local msg processing will be done and buddy needs resync
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
}
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_DEBUG, "Forwarding failed: "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
"error: " + boost::lexical_cast<std::string>(commRes));
if(*outChunkLocked)
{ // unlock chunk
chunkLockStore->unlockChunk(target.getID(), getEntryID() );
*outChunkLocked = false;
}
std::string genericRespStr = "Communication with secondary failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(genericRespStr)));
return FhgfsOpsErr_COMMUNICATION;
}
const auto respMsg = (const SetLocalAttrRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr secondaryRes = respMsg->getResult();
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
{
if(secondaryRes == FhgfsOpsErr_UNKNOWNTARGET)
{
/* local msg processing shall be done and buddy needs resync
(this is normal when a storage is restarted without a broken secondary target, so we
report success to a client in this case) */
LogContext(logContext).log(Log_DEBUG,
"Secondary reports unknown target error and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
boost::lexical_cast<std::string>(secondaryRes) + "; "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
return secondaryRes;
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,19 @@
#pragma once
#include <common/storage/StorageErrors.h>
#include <common/net/message/storage/attribs/SetLocalAttrMsg.h>
class StorageTarget;
class SetLocalAttrMsgEx : public SetLocalAttrMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
FhgfsOpsErr forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked);
};

View File

@@ -0,0 +1,26 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/chunkbalancing/CpChunkPathsRespMsg.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "CpChunkPathsMsgEx.h"
bool CpChunkPathsMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "CpChunkPathsMsg incoming";
LogContext(logContext).logErr("This message is not yet implemented. \n It should relay chunk information from metadata to storage and trigger copy chunk operation. ");
FhgfsOpsErr cpMsgRes = FhgfsOpsErr_SUCCESS;
ctx.sendResponse(CpChunkPathsRespMsg(cpMsgRes));
return true;
}
ChunkBalancerJob* CpChunkPathsMsgEx::addChunkBalanceJob()
{
std::lock_guard<Mutex> mutexLock(ChunkBalanceJobMutex);
ChunkBalancerJob* chunkBalanceJob = nullptr;
return chunkBalanceJob;
}

View File

@@ -0,0 +1,16 @@
#pragma once
#include <common/net/message/storage/chunkbalancing/CpChunkPathsMsg.h>
class ChunkBalancerJob;
class CpChunkPathsMsgEx : public CpChunkPathsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
Mutex ChunkBalanceJobMutex;
ChunkBalancerJob* addChunkBalanceJob();
};

View File

@@ -0,0 +1,57 @@
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "RmChunkPathsMsgEx.h"
bool RmChunkPathsMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "RmChunkPathsMsg incoming";
App* app = Program::getApp();
ChunkStore* chunkStore = app->getChunkDirStore();
uint16_t targetID;
StringList& relativePaths = getRelativePaths();
StringList failedPaths;
targetID = getTargetID();
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{ // unknown targetID
LogContext(logContext).logErr("Unknown targetID: " + StringTk::uintToStr(targetID));
failedPaths = relativePaths;
}
else
{ // valid targetID
const int targetFD = isMsgHeaderFeatureFlagSet(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR)
? *target->getMirrorFD()
: *target->getChunkFD();
for(StringListIter iter = relativePaths.begin(); iter != relativePaths.end(); iter++)
{
// remove chunk
int unlinkRes = unlinkat(targetFD, (*iter).c_str(), 0);
if ( (unlinkRes != 0) && (errno != ENOENT) )
{
LogContext(logContext).logErr(
"Unable to remove entry: " + *iter + "; error: " + System::getErrString());
failedPaths.push_back(*iter);
continue;
}
// removal succeeded; this might have been the last entry => try to remove parent directory
Path parentDirPath(StorageTk::getPathDirname(*iter));
chunkStore->rmdirChunkDirPath(targetFD, &parentDirPath);
}
}
ctx.sendResponse(RmChunkPathsRespMsg(&failedPaths) );
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
class RmChunkPathsMsgEx : public RmChunkPathsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,268 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/creating/UnlinkLocalFileRespMsg.h>
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "UnlinkLocalFileMsgEx.h"
#include <boost/lexical_cast.hpp>
bool UnlinkLocalFileMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "UnlinkChunkFileMsg incoming";
App* app = Program::getApp();
ChunkStore* chunkDirStore = app->getChunkDirStore();
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
uint16_t targetID;
bool chunkLocked = false;
int targetFD = -1;
Path chunkDirPath;
const PathInfo* pathInfo = getPathInfo();
bool hasOrigFeature = pathInfo->hasOrigFeature();
int unlinkRes = -1;
StorageTarget* target;
// select the right targetID
targetID = getTargetID();
if(isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
if(unlikely(!targetID) )
{ // unknown target
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
if (isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR))
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
mgmt will mark this target as (p)offline in a few moments. */
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
return true;
}
LOG(GENERAL, ERR, "Unknown targetID.", targetID);
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
return true;
}
{ // get targetFD and check consistency state
bool skipResponse = false;
targetFD = getTargetFD(*target, ctx, &skipResponse);
if(unlikely(targetFD == -1) )
{ // failed => consistency state not good
if(skipResponse)
goto skip_response; // GenericResponseMsg sent
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
// forward to secondary (if appropriate)
clientErrRes = forwardToSecondary(*target, ctx, &chunkLocked);
if(unlikely(clientErrRes != FhgfsOpsErr_SUCCESS) )
{
if(clientErrRes == FhgfsOpsErr_COMMUNICATION)
goto skip_response; // GenericResponseMsg sent
goto send_response;
}
{ // valid targetID
// generate path to chunk file...
std::string chunkFilePathStr; // chunkDirPathStr + '/' + entryID
StorageTk::getChunkDirChunkFilePath(pathInfo, getEntryID(), hasOrigFeature, chunkDirPath,
chunkFilePathStr);
unlinkRes = unlinkat(targetFD, chunkFilePathStr.c_str(), 0);
if( (unlinkRes == -1) && (errno != ENOENT) )
{ // error
LogContext(logContext).logErr("Unable to unlink file: " + chunkFilePathStr + ". " +
"SysErr: " + System::getErrString() );
clientErrRes = FhgfsOpsErr_INTERNAL;
}
else
{ // success
LogContext(logContext).log(Log_DEBUG, "File unlinked: " + chunkFilePathStr);
}
}
send_response:
if(chunkLocked) // unlock chunk
app->getChunkLockStore()->unlockChunk(targetID, getEntryID() );
ctx.sendResponse(UnlinkLocalFileRespMsg(clientErrRes) );
skip_response:
// try to rmdir chunkDirPath (in case this was the last chunkfile in a dir)
if (!unlinkRes && hasOrigFeature)
chunkDirStore->rmdirChunkDirPath(targetFD, &chunkDirPath);
// update operation counters...
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_UNLINK,
getMsgHeaderUserID() );
return true;
}
/**
* @param outResponseSent true if a response was sent from within this method; can only be true if
* -1 is returned.
* @return -1 if consistency state was not good (in which case a special response is sent within
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
*/
int UnlinkLocalFileMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
bool* outResponseSent)
{
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR);
*outResponseSent = false;
// get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
isBuddyMirrorChunk &&
!isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
{ // this is a msg to a non-good primary
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID());
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
*outResponseSent = true;
return -1;
}
return targetFD;
}
/**
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
*
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
* case *outChunkLocked==false is guaranteed).
* @throw SocketException if sending of GenericResponseMsg fails.
*/
FhgfsOpsErr UnlinkLocalFileMsgEx::forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked)
{
const char* logContext = "UnlinkLocalFileMsg incoming (forward to secondary)";
App* app = Program::getApp();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
*outChunkLocked = false;
if(!isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR) ||
isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
return FhgfsOpsErr_SUCCESS; // nothing to do
// mirrored chunk should be modified, check if resync is in progress and lock chunk
*outChunkLocked = target.getBuddyResyncInProgress();
if(*outChunkLocked)
chunkLockStore->lockChunk(target.getID(), getEntryID() ); // lock chunk
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
addMsgHeaderFeatureFlag(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_UnlinkLocalFileResp);
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
// remove the flag that we just added for secondary
unsetMsgHeaderFeatureFlag(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
if(unlikely(
(commRes == FhgfsOpsErr_COMMUNICATION) &&
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
{
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
// buddy is marked offline, so local msg processing will be done and buddy needs resync
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
}
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_DEBUG, "Forwarding failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
"error: " + boost::lexical_cast<std::string>(commRes));
if(*outChunkLocked)
{ // unlock chunk
chunkLockStore->unlockChunk(target.getID(), getEntryID() );
*outChunkLocked = false;
}
std::string genericRespStr = "Communication with secondary failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
ctx.sendResponse(GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR,
std::move(genericRespStr)));
return FhgfsOpsErr_COMMUNICATION;
}
UnlinkLocalFileRespMsg* respMsg = (UnlinkLocalFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr secondaryRes = respMsg->getResult();
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
{
if(secondaryRes == FhgfsOpsErr_UNKNOWNTARGET)
{
/* local msg processing shall be done and buddy needs resync
(this is normal when a storage is restarted without a broken secondary target, so we
report success to a client in this case) */
LogContext(logContext).log(Log_DEBUG,
"Secondary reports unknown target error and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
boost::lexical_cast<std::string>(secondaryRes) + "; "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
return secondaryRes;
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,17 @@
#pragma once
#include <common/net/message/storage/creating/UnlinkLocalFileMsg.h>
class StorageTarget;
class UnlinkLocalFileMsgEx : public UnlinkLocalFileMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
FhgfsOpsErr forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked);
};

Some files were not shown because too many files have changed in this diff Show More