New upstream version 8.1.0
This commit is contained in:
1530
storage/source/app/App.cpp
Normal file
1530
storage/source/app/App.cpp
Normal file
File diff suppressed because it is too large
Load Diff
424
storage/source/app/App.h
Normal file
424
storage/source/app/App.h
Normal file
@@ -0,0 +1,424 @@
|
||||
#pragma once
|
||||
|
||||
#include <app/config/Config.h>
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <common/app/AbstractApp.h>
|
||||
#include <common/components/streamlistenerv2/ConnAcceptor.h>
|
||||
#include <common/components/streamlistenerv2/StreamListenerV2.h>
|
||||
#include <common/components/worker/queue/MultiWorkQueue.h>
|
||||
#include <common/components/worker/Worker.h>
|
||||
#include <common/components/TimerQueue.h>
|
||||
#include <common/nodes/MirrorBuddyGroupMapper.h>
|
||||
#include <common/nodes/NodeStoreServers.h>
|
||||
#include <common/nodes/TargetStateStore.h>
|
||||
#include <common/storage/Path.h>
|
||||
#include <common/storage/Storagedata.h>
|
||||
#include <common/toolkit/AcknowledgmentStore.h>
|
||||
#include <common/storage/quota/ExceededQuotaStore.h>
|
||||
#include <common/toolkit/NetFilter.h>
|
||||
#include <common/Common.h>
|
||||
#include <components/benchmarker/StorageBenchOperator.h>
|
||||
#include <components/buddyresyncer/BuddyResyncer.h>
|
||||
#include <components/chunkfetcher/ChunkFetcher.h>
|
||||
#include <components/DatagramListener.h>
|
||||
#include <components/InternodeSyncer.h>
|
||||
#include <components/StorageStatsCollector.h>
|
||||
#include <net/message/NetMessageFactory.h>
|
||||
#include <nodes/StorageNodeOpStats.h>
|
||||
#include <session/SessionStore.h>
|
||||
#include <storage/ChunkLockStore.h>
|
||||
#include <storage/ChunkStore.h>
|
||||
#include <storage/SyncedStoragePaths.h>
|
||||
#include <storage/StorageTargets.h>
|
||||
#include <toolkit/QuotaTk.h>
|
||||
|
||||
|
||||
#ifndef BEEGFS_VERSION
|
||||
#error BEEGFS_VERSION undefined
|
||||
#endif
|
||||
|
||||
// program return codes
|
||||
#define APPCODE_NO_ERROR 0
|
||||
#define APPCODE_INVALID_CONFIG 1
|
||||
#define APPCODE_INITIALIZATION_ERROR 2
|
||||
#define APPCODE_RUNTIME_ERROR 3
|
||||
|
||||
|
||||
typedef std::list<Worker*> WorkerList;
|
||||
typedef WorkerList::iterator WorkerListIter;
|
||||
|
||||
typedef std::vector<StreamListenerV2*> StreamLisVec;
|
||||
typedef StreamLisVec::iterator StreamLisVecIter;
|
||||
|
||||
|
||||
// forward declarations
|
||||
class LogContext;
|
||||
|
||||
class App : public AbstractApp
|
||||
{
|
||||
public:
|
||||
App(int argc, char** argv);
|
||||
virtual ~App();
|
||||
|
||||
virtual void run() override;
|
||||
|
||||
virtual void stopComponents() override;
|
||||
virtual void handleComponentException(std::exception& e) override;
|
||||
virtual void handleNetworkInterfaceFailure(const std::string& devname) override;
|
||||
|
||||
void handleNetworkInterfacesChanged(NicAddressList nicList);
|
||||
|
||||
private:
|
||||
int appResult;
|
||||
int argc;
|
||||
char** argv;
|
||||
|
||||
Config* cfg;
|
||||
LogContext* log;
|
||||
std::list<std::string> allowedInterfaces;
|
||||
|
||||
LockFD pidFileLockFD;
|
||||
std::vector<LockFD> storageTargetLocks;
|
||||
|
||||
NetFilter* netFilter; // empty filter means "all nets allowed"
|
||||
NetFilter* tcpOnlyFilter; // for IPs that allow only plain TCP (no RDMA etc)
|
||||
std::shared_ptr<Node> localNode;
|
||||
|
||||
NodeStoreServers* mgmtNodes;
|
||||
NodeStoreServers* metaNodes; // needed for backward communication introduced with GAM integration
|
||||
NodeStoreServers* storageNodes;
|
||||
|
||||
TargetMapper* targetMapper;
|
||||
MirrorBuddyGroupMapper* mirrorBuddyGroupMapper; // maps targets to mirrorBuddyGroups
|
||||
TargetStateStore* targetStateStore; // map storage targets to a state
|
||||
|
||||
MultiWorkQueueMap workQueueMap; // maps targetIDs to WorkQueues
|
||||
SessionStore* sessions;
|
||||
StorageNodeOpStats* nodeOperationStats; // file system operation statistics
|
||||
AcknowledgmentStore* ackStore;
|
||||
NetMessageFactory* netMessageFactory;
|
||||
|
||||
StorageTargets* storageTargets; // target IDs and corresponding storage paths
|
||||
SyncedStoragePaths* syncedStoragePaths; // serializes access to paths (=> entryIDs)
|
||||
StorageBenchOperator* storageBenchOperator; // benchmark for the storage
|
||||
|
||||
DatagramListener* dgramListener;
|
||||
ConnAcceptor* connAcceptor;
|
||||
StatsCollector* statsCollector;
|
||||
InternodeSyncer* internodeSyncer;
|
||||
TimerQueue* timerQueue;
|
||||
|
||||
ChunkFetcher* chunkFetcher;
|
||||
|
||||
unsigned numStreamListeners; // value copied from cfg (for performance)
|
||||
StreamLisVec streamLisVec;
|
||||
|
||||
WorkerList workerList;
|
||||
bool workersRunning;
|
||||
Mutex mutexWorkersRunning;
|
||||
|
||||
ChunkStore* chunkDirStore;
|
||||
|
||||
unsigned nextNumaBindTarget; // the numa node to which we will bind the next component thread
|
||||
|
||||
ExceededQuotaPerTarget exceededQuotaStores;
|
||||
|
||||
BuddyResyncer* buddyResyncer;
|
||||
ChunkLockStore* chunkLockStore;
|
||||
|
||||
std::unique_ptr<StoragePoolStore> storagePoolStore;
|
||||
|
||||
void* dlOpenHandleLibZfs; // handle of the libzfs from dlopen
|
||||
bool libZfsErrorReported;
|
||||
|
||||
void runNormal();
|
||||
|
||||
void streamListenersInit();
|
||||
void streamListenersStart();
|
||||
void streamListenersStop();
|
||||
void streamListenersDelete();
|
||||
void streamListenersJoin();
|
||||
|
||||
void workersInit();
|
||||
void workersStart();
|
||||
void workersStop();
|
||||
void workersDelete();
|
||||
void workersJoin();
|
||||
|
||||
void initLogging();
|
||||
void initDataObjects();
|
||||
void initBasicNetwork();
|
||||
void initLocalNodeIDs(NumNodeID& outLocalNodeNumID);
|
||||
void initLocalNode(NumNodeID localNodeNumID);
|
||||
void initLocalNodeNumIDFile(NumNodeID localNodeNumID) ;
|
||||
void preinitStorage();
|
||||
void checkTargetsUUIDs();
|
||||
void initStorage();
|
||||
void initPostTargetRegistration();
|
||||
void initComponents();
|
||||
|
||||
void startComponents();
|
||||
void joinComponents();
|
||||
|
||||
bool waitForMgmtNode();
|
||||
bool preregisterNode(NumNodeID& outLocalNodeNumID);
|
||||
boost::optional<std::map<uint16_t, std::unique_ptr<StorageTarget>>> preregisterTargets(
|
||||
const NumNodeID localNodeNumID);
|
||||
bool preregisterTarget(Node& mgmtNode, std::string targetID, uint16_t targetNumID,
|
||||
uint16_t* outNewTargetNumID);
|
||||
bool registerAndDownloadMgmtInfo();
|
||||
|
||||
void logInfos();
|
||||
|
||||
void setUmask();
|
||||
|
||||
void daemonize();
|
||||
|
||||
void registerSignalHandler();
|
||||
static void signalHandler(int sig);
|
||||
|
||||
bool restoreSessions();
|
||||
bool storeSessions();
|
||||
bool deleteSessionFiles();
|
||||
|
||||
bool openLibZfs();
|
||||
bool closeLibZfs();
|
||||
|
||||
|
||||
public:
|
||||
/**
|
||||
* Get one of the available stream listeners based on the socket file descriptor number.
|
||||
* This is to load-balance the sockets over all available stream listeners and ensure that
|
||||
* sockets are not bouncing between different stream listeners.
|
||||
*
|
||||
* Note that IB connections eat two fd numbers, so 2 and multiples of 2 might not be a good
|
||||
* value for number of stream listeners.
|
||||
*/
|
||||
virtual StreamListenerV2* getStreamListenerByFD(int fd) override
|
||||
{
|
||||
return streamLisVec[fd % numStreamListeners];
|
||||
}
|
||||
|
||||
// getters & setters
|
||||
virtual const ICommonConfig* getCommonConfig() const override
|
||||
{
|
||||
return cfg;
|
||||
}
|
||||
|
||||
virtual const NetFilter* getNetFilter() const override
|
||||
{
|
||||
return netFilter;
|
||||
}
|
||||
|
||||
virtual const NetFilter* getTcpOnlyFilter() const override
|
||||
{
|
||||
return tcpOnlyFilter;
|
||||
}
|
||||
|
||||
virtual const AbstractNetMessageFactory* getNetMessageFactory() const override
|
||||
{
|
||||
return netMessageFactory;
|
||||
}
|
||||
|
||||
AcknowledgmentStore* getAckStore() const
|
||||
{
|
||||
return ackStore;
|
||||
}
|
||||
|
||||
Config* getConfig() const
|
||||
{
|
||||
return cfg;
|
||||
}
|
||||
|
||||
void updateLocalNicList(NicAddressList& localNicList);
|
||||
|
||||
Node& getLocalNode() const
|
||||
{
|
||||
return *localNode;
|
||||
}
|
||||
|
||||
NodeStoreServers* getMgmtNodes() const
|
||||
{
|
||||
return mgmtNodes;
|
||||
}
|
||||
|
||||
NodeStoreServers* getMetaNodes() const
|
||||
{
|
||||
return metaNodes;
|
||||
}
|
||||
|
||||
NodeStoreServers* getStorageNodes() const
|
||||
{
|
||||
return storageNodes;
|
||||
}
|
||||
|
||||
TargetMapper* getTargetMapper() const
|
||||
{
|
||||
return targetMapper;
|
||||
}
|
||||
|
||||
MirrorBuddyGroupMapper* getMirrorBuddyGroupMapper() const
|
||||
{
|
||||
return mirrorBuddyGroupMapper;
|
||||
}
|
||||
|
||||
TargetStateStore* getTargetStateStore() const
|
||||
{
|
||||
return targetStateStore;
|
||||
}
|
||||
|
||||
MultiWorkQueue* getWorkQueue(uint16_t targetID) const
|
||||
{
|
||||
MultiWorkQueueMapCIter iter = workQueueMap.find(targetID);
|
||||
|
||||
if(iter != workQueueMap.end() )
|
||||
return iter->second;
|
||||
|
||||
/* note: it's not unusual to not find given targetID, e.g.
|
||||
- when per-target queues are disabled
|
||||
- or when server restarted without one of its targets (and clients don't know that)
|
||||
- or if client couldn't provide targetID because it's not a target message */
|
||||
|
||||
return workQueueMap.begin()->second;
|
||||
}
|
||||
|
||||
MultiWorkQueueMap* getWorkQueueMap()
|
||||
{
|
||||
return &workQueueMap;
|
||||
}
|
||||
|
||||
SessionStore* getSessions() const
|
||||
{
|
||||
return sessions;
|
||||
}
|
||||
|
||||
StorageNodeOpStats* getNodeOpStats() const
|
||||
{
|
||||
return nodeOperationStats;
|
||||
}
|
||||
|
||||
StorageTargets* getStorageTargets() const
|
||||
{
|
||||
return storageTargets;
|
||||
}
|
||||
|
||||
SyncedStoragePaths* getSyncedStoragePaths() const
|
||||
{
|
||||
return syncedStoragePaths;
|
||||
}
|
||||
|
||||
StorageBenchOperator* getStorageBenchOperator() const
|
||||
{
|
||||
return this->storageBenchOperator;
|
||||
}
|
||||
|
||||
DatagramListener* getDatagramListener() const
|
||||
{
|
||||
return dgramListener;
|
||||
}
|
||||
|
||||
const StreamLisVec* getStreamListenerVec() const
|
||||
{
|
||||
return &streamLisVec;
|
||||
}
|
||||
|
||||
StatsCollector* getStatsCollector() const
|
||||
{
|
||||
return statsCollector;
|
||||
}
|
||||
|
||||
InternodeSyncer* getInternodeSyncer() const
|
||||
{
|
||||
return internodeSyncer;
|
||||
}
|
||||
|
||||
TimerQueue* getTimerQueue() const
|
||||
{
|
||||
return timerQueue;
|
||||
}
|
||||
|
||||
int getAppResult() const
|
||||
{
|
||||
return appResult;
|
||||
}
|
||||
|
||||
bool getWorkersRunning()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(mutexWorkersRunning);
|
||||
return this->workersRunning;
|
||||
}
|
||||
|
||||
ChunkStore* getChunkDirStore() const
|
||||
{
|
||||
return this->chunkDirStore;
|
||||
}
|
||||
|
||||
ChunkFetcher* getChunkFetcher() const
|
||||
{
|
||||
return this->chunkFetcher;
|
||||
}
|
||||
|
||||
const ExceededQuotaPerTarget* getExceededQuotaStores() const
|
||||
{
|
||||
return &exceededQuotaStores;
|
||||
}
|
||||
|
||||
BuddyResyncer* getBuddyResyncer() const
|
||||
{
|
||||
return this->buddyResyncer;
|
||||
}
|
||||
|
||||
ChunkLockStore* getChunkLockStore() const
|
||||
{
|
||||
return chunkLockStore;
|
||||
}
|
||||
|
||||
WorkerList* getWorkers()
|
||||
{
|
||||
return &workerList;
|
||||
}
|
||||
|
||||
StoragePoolStore* getStoragePoolStore() const
|
||||
{
|
||||
return storagePoolStore.get();
|
||||
}
|
||||
|
||||
void setLibZfsErrorReported(bool isReported)
|
||||
{
|
||||
libZfsErrorReported = isReported;
|
||||
}
|
||||
|
||||
void* getDlOpenHandleLibZfs()
|
||||
{
|
||||
if(dlOpenHandleLibZfs)
|
||||
return dlOpenHandleLibZfs;
|
||||
else
|
||||
if(cfg->getQuotaDisableZfsSupport() )
|
||||
{
|
||||
if(!libZfsErrorReported)
|
||||
{
|
||||
LOG(QUOTA, ERR, "Quota support for ZFS is disabled.");
|
||||
libZfsErrorReported = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if(!libZfsErrorReported)
|
||||
openLibZfs();
|
||||
|
||||
return dlOpenHandleLibZfs;
|
||||
}
|
||||
|
||||
bool isDlOpenHandleLibZfsValid()
|
||||
{
|
||||
if(dlOpenHandleLibZfs)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void findAllowedInterfaces(NicAddressList& outList) const;
|
||||
void findAllowedRDMAInterfaces(NicAddressList& outList) const;
|
||||
|
||||
|
||||
};
|
||||
256
storage/source/app/config/Config.cpp
Normal file
256
storage/source/app/config/Config.cpp
Normal file
@@ -0,0 +1,256 @@
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <common/toolkit/UnitTk.h>
|
||||
#include "Config.h"
|
||||
|
||||
#define CONFIG_DEFAULT_CFGFILENAME "/etc/beegfs/beegfs-storage.conf"
|
||||
|
||||
#define CONFIG_STORAGETARGETS_DELIMITER ','
|
||||
|
||||
|
||||
Config::Config(int argc, char** argv) :
|
||||
AbstractConfig(argc, argv)
|
||||
{
|
||||
initConfig(argc, argv, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default values for each configurable in the configMap.
|
||||
*
|
||||
* @param addDashes currently unused
|
||||
*/
|
||||
void Config::loadDefaults(bool addDashes)
|
||||
{
|
||||
AbstractConfig::loadDefaults();
|
||||
|
||||
// re-definitions
|
||||
configMapRedefine("cfgFile", "");
|
||||
|
||||
// own definitions
|
||||
configMapRedefine("connInterfacesFile", "");
|
||||
configMapRedefine("connInterfacesList", "");
|
||||
|
||||
configMapRedefine("storeStorageDirectory", "");
|
||||
configMapRedefine("storeFsUUID", "");
|
||||
configMapRedefine("storeAllowFirstRunInit", "true");
|
||||
|
||||
configMapRedefine("tuneNumStreamListeners", "1");
|
||||
configMapRedefine("tuneNumWorkers", "8");
|
||||
configMapRedefine("tuneWorkerBufSize", "4m");
|
||||
configMapRedefine("tuneProcessFDLimit", "50000");
|
||||
configMapRedefine("tuneWorkerNumaAffinity", "false");
|
||||
configMapRedefine("tuneListenerNumaAffinity", "false");
|
||||
configMapRedefine("tuneListenerPrioShift", "-1");
|
||||
configMapRedefine("tuneBindToNumaZone", "");
|
||||
configMapRedefine("tuneFileReadSize", "32k");
|
||||
configMapRedefine("tuneFileReadAheadTriggerSize", "4m");
|
||||
configMapRedefine("tuneFileReadAheadSize", "0");
|
||||
configMapRedefine("tuneFileWriteSize", "64k");
|
||||
configMapRedefine("tuneFileWriteSyncSize", "0");
|
||||
configMapRedefine("tuneUsePerUserMsgQueues", "false");
|
||||
configMapRedefine("tuneDirCacheLimit", "1024");
|
||||
configMapRedefine("tuneEarlyStat", "false");
|
||||
configMapRedefine("tuneNumResyncSlaves", "12");
|
||||
configMapRedefine("tuneNumResyncGatherSlaves", "6");
|
||||
configMapRedefine("tuneUseAggressiveStreamPoll", "false");
|
||||
configMapRedefine("tuneUsePerTargetWorkers", "true");
|
||||
|
||||
configMapRedefine("quotaEnableEnforcement", "false");
|
||||
configMapRedefine("quotaDisableZfsSupport", "false");
|
||||
|
||||
configMapRedefine("sysResyncSafetyThresholdMins", "10");
|
||||
configMapRedefine("sysTargetOfflineTimeoutSecs", "180");
|
||||
|
||||
configMapRedefine("runDaemonized", "false");
|
||||
|
||||
configMapRedefine("pidFile", "");
|
||||
}
|
||||
|
||||
/**
|
||||
* @param addDashes currently usused
|
||||
*/
|
||||
void Config::applyConfigMap(bool enableException, bool addDashes)
|
||||
{
|
||||
AbstractConfig::applyConfigMap(false);
|
||||
|
||||
for (StringMapIter iter = configMap.begin(); iter != configMap.end();)
|
||||
{
|
||||
bool unknownElement = false;
|
||||
|
||||
if (iter->first == std::string("logType"))
|
||||
{
|
||||
if (iter->second == "syslog")
|
||||
{
|
||||
logType = LogType_SYSLOG;
|
||||
}
|
||||
else if (iter->second == "logfile")
|
||||
{
|
||||
logType = LogType_LOGFILE;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw InvalidConfigException("The value of config argument logType is invalid.");
|
||||
}
|
||||
}
|
||||
else if (iter->first == std::string("connInterfacesFile"))
|
||||
connInterfacesFile = iter->second;
|
||||
else if (iter->first == std::string("connInterfacesList"))
|
||||
connInterfacesList = iter->second;
|
||||
else if (iter->first == std::string("storeStorageDirectory"))
|
||||
{
|
||||
storageDirectories.clear();
|
||||
|
||||
std::list<std::string> split;
|
||||
|
||||
StringTk::explode(iter->second, CONFIG_STORAGETARGETS_DELIMITER, &split);
|
||||
|
||||
std::transform(
|
||||
split.begin(), split.end(),
|
||||
std::back_inserter(storageDirectories),
|
||||
[] (const std::string& p) {
|
||||
return Path(StringTk::trim(p));
|
||||
});
|
||||
storageDirectories.remove_if(std::mem_fn(&Path::empty));
|
||||
}
|
||||
else if (iter->first == std::string("storeFsUUID"))
|
||||
{
|
||||
storeFsUUID.clear();
|
||||
|
||||
std::list<std::string> split;
|
||||
|
||||
StringTk::explode(iter->second, CONFIG_STORAGETARGETS_DELIMITER, &split);
|
||||
|
||||
std::transform(
|
||||
split.begin(), split.end(),
|
||||
std::back_inserter(storeFsUUID),
|
||||
[] (const std::string& p) {
|
||||
return StringTk::trim(p);
|
||||
});
|
||||
storeFsUUID.remove_if(std::mem_fn(&std::string::empty));
|
||||
}
|
||||
else if (iter->first == std::string("storeAllowFirstRunInit"))
|
||||
storeAllowFirstRunInit = StringTk::strToBool(iter->second);
|
||||
else if (iter->first == std::string("tuneNumStreamListeners"))
|
||||
tuneNumStreamListeners = StringTk::strToUInt(iter->second);
|
||||
else if (iter->first == std::string("tuneNumWorkers"))
|
||||
tuneNumWorkers = StringTk::strToUInt(iter->second);
|
||||
else if (iter->first == std::string("tuneWorkerBufSize"))
|
||||
tuneWorkerBufSize = UnitTk::strHumanToInt64(iter->second);
|
||||
else if (iter->first == std::string("tuneProcessFDLimit"))
|
||||
tuneProcessFDLimit = StringTk::strToUInt(iter->second);
|
||||
else if (iter->first == std::string("tuneWorkerNumaAffinity"))
|
||||
tuneWorkerNumaAffinity = StringTk::strToBool(iter->second);
|
||||
else if (iter->first == std::string("tuneListenerNumaAffinity"))
|
||||
tuneListenerNumaAffinity = StringTk::strToBool(iter->second);
|
||||
else if (iter->first == std::string("tuneBindToNumaZone"))
|
||||
{
|
||||
if (iter->second.empty()) // not defined => disable
|
||||
tuneBindToNumaZone = -1; // -1 means disable binding
|
||||
else
|
||||
tuneBindToNumaZone = StringTk::strToInt(iter->second);
|
||||
}
|
||||
else if (iter->first == std::string("tuneListenerPrioShift"))
|
||||
tuneListenerPrioShift = StringTk::strToInt(iter->second);
|
||||
else if (iter->first == std::string("tuneFileReadSize"))
|
||||
tuneFileReadSize = UnitTk::strHumanToInt64(iter->second);
|
||||
else if (iter->first == std::string("tuneFileReadAheadTriggerSize"))
|
||||
tuneFileReadAheadTriggerSize = UnitTk::strHumanToInt64(iter->second);
|
||||
else if (iter->first == std::string("tuneFileReadAheadSize"))
|
||||
tuneFileReadAheadSize = UnitTk::strHumanToInt64(iter->second);
|
||||
else if (iter->first == std::string("tuneFileWriteSize"))
|
||||
tuneFileWriteSize = UnitTk::strHumanToInt64(iter->second);
|
||||
else if (iter->first == std::string("tuneFileWriteSyncSize"))
|
||||
tuneFileWriteSyncSize = UnitTk::strHumanToInt64(iter->second);
|
||||
else if (iter->first == std::string("tuneUsePerUserMsgQueues"))
|
||||
tuneUsePerUserMsgQueues = StringTk::strToBool(iter->second);
|
||||
else if (iter->first == std::string("tuneDirCacheLimit"))
|
||||
tuneDirCacheLimit = StringTk::strToUInt(iter->second);
|
||||
else if (iter->first == std::string("tuneEarlyStat"))
|
||||
this->tuneEarlyStat = StringTk::strToBool(iter->second);
|
||||
else if (iter->first == std::string("tuneNumResyncGatherSlaves"))
|
||||
this->tuneNumResyncGatherSlaves = StringTk::strToUInt(iter->second);
|
||||
else if (iter->first == std::string("tuneNumResyncSlaves"))
|
||||
this->tuneNumResyncSlaves = StringTk::strToUInt(iter->second);
|
||||
else if (iter->first == std::string("tuneUseAggressiveStreamPoll"))
|
||||
tuneUseAggressiveStreamPoll = StringTk::strToBool(iter->second);
|
||||
else if (iter->first == std::string("tuneUsePerTargetWorkers"))
|
||||
tuneUsePerTargetWorkers = StringTk::strToBool(iter->second);
|
||||
else if (iter->first == std::string("quotaEnableEnforcement"))
|
||||
quotaEnableEnforcement = StringTk::strToBool(iter->second);
|
||||
else if (iter->first == std::string("quotaDisableZfsSupport"))
|
||||
quotaDisableZfsSupport = StringTk::strToBool(iter->second);
|
||||
else if (iter->first == std::string("sysResyncSafetyThresholdMins"))
|
||||
sysResyncSafetyThresholdMins = StringTk::strToInt64(iter->second);
|
||||
else if (iter->first == std::string("sysTargetOfflineTimeoutSecs"))
|
||||
{
|
||||
sysTargetOfflineTimeoutSecs = StringTk::strToUInt(iter->second);
|
||||
|
||||
if (sysTargetOfflineTimeoutSecs < 30)
|
||||
{
|
||||
throw InvalidConfigException("Invalid sysTargetOfflineTimeoutSecs value "
|
||||
+ iter->second + " (must be at least 30)");
|
||||
}
|
||||
}
|
||||
else if (iter->first == std::string("runDaemonized"))
|
||||
runDaemonized = StringTk::strToBool(iter->second);
|
||||
else if (iter->first == std::string("pidFile"))
|
||||
pidFile = iter->second;
|
||||
else
|
||||
{
|
||||
// unknown element occurred
|
||||
unknownElement = true;
|
||||
|
||||
if (enableException)
|
||||
{
|
||||
throw InvalidConfigException("The config argument '" + iter->first + "' is invalid");
|
||||
}
|
||||
}
|
||||
|
||||
if (unknownElement)
|
||||
{
|
||||
// just skip the unknown element
|
||||
iter++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// remove this element from the map
|
||||
iter = eraseFromConfigMap(iter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Config::initImplicitVals()
|
||||
{
|
||||
// tuneFileReadAheadTriggerSize (should be ">= tuneFileReadAheadSize")
|
||||
if(tuneFileReadAheadTriggerSize < tuneFileReadAheadSize)
|
||||
tuneFileReadAheadTriggerSize = tuneFileReadAheadSize;
|
||||
|
||||
// connInterfacesList(/File)
|
||||
AbstractConfig::initInterfacesList(connInterfacesFile, connInterfacesList);
|
||||
|
||||
AbstractConfig::initSocketBufferSizes();
|
||||
|
||||
// check if sync_file_range was enabled on a distro that doesn't support it
|
||||
#ifndef CONFIG_DISTRO_HAS_SYNC_FILE_RANGE
|
||||
if(tuneFileWriteSyncSize)
|
||||
{
|
||||
throw InvalidConfigException(
|
||||
"Config option is not supported for this distribution: 'tuneFileWriteSyncSize'");
|
||||
}
|
||||
#endif
|
||||
|
||||
// connAuthHash
|
||||
AbstractConfig::initConnAuthHash(connAuthFile, &connAuthHash);
|
||||
}
|
||||
|
||||
std::string Config::createDefaultCfgFilename() const
|
||||
{
|
||||
struct stat statBuf;
|
||||
|
||||
const int statRes = stat(CONFIG_DEFAULT_CFGFILENAME, &statBuf);
|
||||
|
||||
if(!statRes && S_ISREG(statBuf.st_mode) )
|
||||
return CONFIG_DEFAULT_CFGFILENAME; // there appears to be a config file
|
||||
|
||||
return ""; // no default file otherwise
|
||||
}
|
||||
|
||||
229
storage/source/app/config/Config.h
Normal file
229
storage/source/app/config/Config.h
Normal file
@@ -0,0 +1,229 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/app/config/AbstractConfig.h>
|
||||
|
||||
/**
|
||||
* Find out whether this distro hash sync_file_range() support (added in linux-2.6.17, glibc 2.6).
|
||||
* Note: Problem is that RHEL 5 defines SYNC_FILE_RANGE_WRITE, but uses glibc 2.5 which has no
|
||||
* sync_file_range support, so linker complains about undefined reference.
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
#include <features.h>
|
||||
#include <fcntl.h>
|
||||
#if __GLIBC_PREREQ(2, 6) && defined(SYNC_FILE_RANGE_WRITE)
|
||||
#define CONFIG_DISTRO_HAS_SYNC_FILE_RANGE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
class Config : public AbstractConfig
|
||||
{
|
||||
public:
|
||||
Config(int argc, char** argv);
|
||||
|
||||
private:
|
||||
|
||||
// configurables
|
||||
|
||||
std::string connInterfacesFile; // implicitly generates connInterfacesList
|
||||
std::string connInterfacesList; // comma-separated list
|
||||
|
||||
std::list<Path> storageDirectories;
|
||||
std::list<std::string> storeFsUUID;
|
||||
bool storeAllowFirstRunInit;
|
||||
|
||||
unsigned tuneNumStreamListeners;
|
||||
unsigned tuneNumWorkers;
|
||||
unsigned tuneWorkerBufSize;
|
||||
unsigned tuneProcessFDLimit; // 0 means "don't touch limit"
|
||||
bool tuneWorkerNumaAffinity;
|
||||
bool tuneListenerNumaAffinity;
|
||||
int tuneBindToNumaZone; // bind all threads to this zone, -1 means no binding
|
||||
int tuneListenerPrioShift;
|
||||
ssize_t tuneFileReadSize;
|
||||
ssize_t tuneFileReadAheadTriggerSize; // after how much seq read to start read-ahead
|
||||
ssize_t tuneFileReadAheadSize; // read-ahead with posix_fadvise(..., POSIX_FADV_WILLNEED)
|
||||
ssize_t tuneFileWriteSize;
|
||||
ssize_t tuneFileWriteSyncSize; // after how many of per session data to sync_file_range()
|
||||
bool tuneUsePerUserMsgQueues; // true to use UserWorkContainer for MultiWorkQueue
|
||||
unsigned tuneDirCacheLimit;
|
||||
bool tuneEarlyStat; // stat the chunk file before closing it
|
||||
unsigned tuneNumResyncGatherSlaves;
|
||||
unsigned tuneNumResyncSlaves;
|
||||
bool tuneUseAggressiveStreamPoll; // true to not sleep on epoll in streamlisv2
|
||||
bool tuneUsePerTargetWorkers; // true to have tuneNumWorkers separate for each target
|
||||
|
||||
bool quotaEnableEnforcement;
|
||||
bool quotaDisableZfsSupport;
|
||||
|
||||
int64_t sysResyncSafetyThresholdMins; // minutes to add to last buddy comm timestamp
|
||||
unsigned sysTargetOfflineTimeoutSecs;
|
||||
|
||||
bool runDaemonized;
|
||||
|
||||
std::string pidFile;
|
||||
|
||||
|
||||
// internals
|
||||
|
||||
virtual void loadDefaults(bool addDashes) override;
|
||||
virtual void applyConfigMap(bool enableException, bool addDashes) override;
|
||||
virtual void initImplicitVals() override;
|
||||
std::string createDefaultCfgFilename() const;
|
||||
|
||||
public:
|
||||
// getters & setters
|
||||
const std::string& getConnInterfacesList() const
|
||||
{
|
||||
return connInterfacesList;
|
||||
}
|
||||
|
||||
const std::list<Path>& getStorageDirectories() const { return storageDirectories; }
|
||||
|
||||
const std::list<std::string>& getStoreFsUUID() const
|
||||
{
|
||||
return storeFsUUID;
|
||||
}
|
||||
|
||||
bool getStoreAllowFirstRunInit() const
|
||||
{
|
||||
return storeAllowFirstRunInit;
|
||||
}
|
||||
|
||||
unsigned getTuneNumStreamListeners() const
|
||||
{
|
||||
return tuneNumStreamListeners;
|
||||
}
|
||||
|
||||
unsigned getTuneNumWorkers() const
|
||||
{
|
||||
return tuneNumWorkers;
|
||||
}
|
||||
|
||||
unsigned getTuneWorkerBufSize() const
|
||||
{
|
||||
return tuneWorkerBufSize;
|
||||
}
|
||||
|
||||
unsigned getTuneProcessFDLimit() const
|
||||
{
|
||||
return tuneProcessFDLimit;
|
||||
}
|
||||
|
||||
bool getTuneWorkerNumaAffinity() const
|
||||
{
|
||||
return tuneWorkerNumaAffinity;
|
||||
}
|
||||
|
||||
bool getTuneListenerNumaAffinity() const
|
||||
{
|
||||
return tuneListenerNumaAffinity;
|
||||
}
|
||||
|
||||
int getTuneBindToNumaZone() const
|
||||
{
|
||||
return tuneBindToNumaZone;
|
||||
}
|
||||
|
||||
int getTuneListenerPrioShift() const
|
||||
{
|
||||
return tuneListenerPrioShift;
|
||||
}
|
||||
|
||||
ssize_t getTuneFileReadSize() const
|
||||
{
|
||||
return tuneFileReadSize;
|
||||
}
|
||||
|
||||
ssize_t getTuneFileReadAheadTriggerSize() const
|
||||
{
|
||||
return tuneFileReadAheadTriggerSize;
|
||||
}
|
||||
|
||||
ssize_t getTuneFileReadAheadSize() const
|
||||
{
|
||||
return tuneFileReadAheadSize;
|
||||
}
|
||||
|
||||
ssize_t getTuneFileWriteSize() const
|
||||
{
|
||||
return tuneFileWriteSize;
|
||||
}
|
||||
|
||||
ssize_t getTuneFileWriteSyncSize() const
|
||||
{
|
||||
return this->tuneFileWriteSyncSize;
|
||||
}
|
||||
|
||||
bool getTuneUsePerUserMsgQueues() const
|
||||
{
|
||||
return tuneUsePerUserMsgQueues;
|
||||
}
|
||||
|
||||
bool getRunDaemonized() const
|
||||
{
|
||||
return runDaemonized;
|
||||
}
|
||||
|
||||
const std::string& getPIDFile() const
|
||||
{
|
||||
return pidFile;
|
||||
}
|
||||
|
||||
unsigned getTuneDirCacheLimit() const
|
||||
{
|
||||
return tuneDirCacheLimit;
|
||||
}
|
||||
|
||||
bool getTuneEarlyStat() const
|
||||
{
|
||||
return this->tuneEarlyStat;
|
||||
}
|
||||
|
||||
bool getQuotaEnableEnforcement() const
|
||||
{
|
||||
return quotaEnableEnforcement;
|
||||
}
|
||||
|
||||
void setQuotaEnableEnforcement(bool doQuotaEnforcement)
|
||||
{
|
||||
quotaEnableEnforcement = doQuotaEnforcement;
|
||||
}
|
||||
|
||||
bool getQuotaDisableZfsSupport() const
|
||||
{
|
||||
return quotaDisableZfsSupport;
|
||||
}
|
||||
|
||||
unsigned getTuneNumResyncGatherSlaves() const
|
||||
{
|
||||
return tuneNumResyncGatherSlaves;
|
||||
}
|
||||
|
||||
unsigned getTuneNumResyncSlaves() const
|
||||
{
|
||||
return tuneNumResyncSlaves;
|
||||
}
|
||||
|
||||
bool getTuneUseAggressiveStreamPoll() const
|
||||
{
|
||||
return tuneUseAggressiveStreamPoll;
|
||||
}
|
||||
|
||||
bool getTuneUsePerTargetWorkers() const
|
||||
{
|
||||
return tuneUsePerTargetWorkers;
|
||||
}
|
||||
|
||||
int64_t getSysResyncSafetyThresholdMins() const
|
||||
{
|
||||
return sysResyncSafetyThresholdMins;
|
||||
}
|
||||
|
||||
unsigned getSysTargetOfflineTimeoutSecs() const
|
||||
{
|
||||
return sysTargetOfflineTimeoutSecs;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
61
storage/source/components/DatagramListener.cpp
Normal file
61
storage/source/components/DatagramListener.cpp
Normal file
@@ -0,0 +1,61 @@
|
||||
#include "DatagramListener.h"
|
||||
|
||||
#include <common/net/message/NetMessageTypes.h>
|
||||
|
||||
DatagramListener::DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
|
||||
AcknowledgmentStore* ackStore, unsigned short udpPort, bool restrictOutboundInterfaces):
|
||||
AbstractDatagramListener("DGramLis", netFilter, localNicList, ackStore, udpPort,
|
||||
restrictOutboundInterfaces)
|
||||
{
|
||||
}
|
||||
|
||||
DatagramListener::~DatagramListener()
|
||||
{
|
||||
}
|
||||
|
||||
void DatagramListener::handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg)
|
||||
{
|
||||
HighResolutionStats stats; // currently ignored
|
||||
std::shared_ptr<StandardSocket> sock = findSenderSock(fromAddr->sin_addr);
|
||||
if (sock == nullptr)
|
||||
{
|
||||
log.log(Log_WARNING, "Could not handle incoming message: no socket");
|
||||
return;
|
||||
}
|
||||
|
||||
NetMessage::ResponseContext rctx(fromAddr, sock.get(), sendBuf, DGRAMMGR_SENDBUF_SIZE, &stats);
|
||||
|
||||
const auto messageType = netMessageTypeToStr(msg->getMsgType());
|
||||
|
||||
switch(msg->getMsgType() )
|
||||
{
|
||||
// valid messages within this context
|
||||
case NETMSGTYPE_Ack:
|
||||
case NETMSGTYPE_Dummy:
|
||||
case NETMSGTYPE_HeartbeatRequest:
|
||||
case NETMSGTYPE_Heartbeat:
|
||||
case NETMSGTYPE_MapTargets:
|
||||
case NETMSGTYPE_PublishCapacities:
|
||||
case NETMSGTYPE_RemoveNode:
|
||||
case NETMSGTYPE_RefreshStoragePools:
|
||||
case NETMSGTYPE_RefreshTargetStates:
|
||||
case NETMSGTYPE_SetMirrorBuddyGroup:
|
||||
{
|
||||
if(!msg->processIncoming(rctx) )
|
||||
{
|
||||
LOG(GENERAL, WARNING,
|
||||
"Problem encountered during handling of incoming message.", messageType);
|
||||
}
|
||||
} break;
|
||||
|
||||
default:
|
||||
{ // valid, but not within this context
|
||||
log.logErr(
|
||||
"Received a message that is invalid within the current context "
|
||||
"from: " + Socket::ipaddrToStr(fromAddr->sin_addr) + "; "
|
||||
"type: " + messageType );
|
||||
} break;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
20
storage/source/components/DatagramListener.h
Normal file
20
storage/source/components/DatagramListener.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/components/AbstractDatagramListener.h>
|
||||
|
||||
class DatagramListener : public AbstractDatagramListener
|
||||
{
|
||||
public:
|
||||
DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
|
||||
AcknowledgmentStore* ackStore, unsigned short udpPort,
|
||||
bool restrictOutboundInterfaces);
|
||||
virtual ~DatagramListener();
|
||||
|
||||
|
||||
protected:
|
||||
virtual void handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg);
|
||||
|
||||
private:
|
||||
|
||||
};
|
||||
|
||||
1185
storage/source/components/InternodeSyncer.cpp
Normal file
1185
storage/source/components/InternodeSyncer.cpp
Normal file
File diff suppressed because it is too large
Load Diff
144
storage/source/components/InternodeSyncer.h
Normal file
144
storage/source/components/InternodeSyncer.h
Normal file
@@ -0,0 +1,144 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/components/AbstractDatagramListener.h>
|
||||
#include <common/components/ComponentInitException.h>
|
||||
#include <common/nodes/NodeStoreServers.h>
|
||||
#include <common/threading/PThread.h>
|
||||
#include <common/Common.h>
|
||||
|
||||
class StorageTarget;
|
||||
|
||||
class InternodeSyncer : public PThread
|
||||
{
|
||||
public:
|
||||
InternodeSyncer();
|
||||
virtual ~InternodeSyncer();
|
||||
|
||||
static bool downloadAndSyncTargetStates(UInt16List& outTargetIDs,
|
||||
UInt8List& outReachabilityStates, UInt8List& outConsistencyStates);
|
||||
static bool downloadAndSyncNodes();
|
||||
static bool downloadAndSyncTargetMappings();
|
||||
static bool downloadAndSyncMirrorBuddyGroups();
|
||||
static bool downloadAndSyncStoragePools();
|
||||
|
||||
static bool downloadAllExceededQuotaLists(
|
||||
const std::map<uint16_t, std::unique_ptr<StorageTarget>>& targets);
|
||||
static bool downloadExceededQuotaList(uint16_t targetId, QuotaDataType idType,
|
||||
QuotaLimitType exType, UIntList* outIDList, FhgfsOpsErr& error);
|
||||
|
||||
static void syncClientSessions(const std::vector<NodeHandle>& clientsList);
|
||||
|
||||
void publishTargetState(uint16_t targetID, TargetConsistencyState targetState);
|
||||
|
||||
bool publishLocalTargetStateChanges(const TargetStateMap& oldStates,
|
||||
const TargetStateMap& changes);
|
||||
|
||||
static bool registerNode(AbstractDatagramListener* dgramLis);
|
||||
static bool registerTargetMappings();
|
||||
static void requestBuddyTargetStates();
|
||||
|
||||
private:
|
||||
LogContext log;
|
||||
|
||||
Mutex forceTargetStatesUpdateMutex;
|
||||
Mutex forcePublishCapacitiesMutex;
|
||||
Mutex forceStoragePoolsUpdateMutex;
|
||||
Mutex forceCheckNetworkMutex;
|
||||
bool forceTargetStatesUpdate; // true to force update of target states
|
||||
bool forcePublishCapacities; // true to force publishing target capacities
|
||||
bool forceStoragePoolsUpdate; // true to force update of storage pools
|
||||
bool forceCheckNetwork; // true to force update of network interfaces
|
||||
|
||||
virtual void run();
|
||||
void syncLoop();
|
||||
|
||||
// returns true if the local interfaces have changed
|
||||
bool checkNetwork();
|
||||
void dropIdleConns();
|
||||
unsigned dropIdleConnsByStore(NodeStoreServers* nodes);
|
||||
|
||||
void updateTargetStatesAndBuddyGroups();
|
||||
void publishTargetCapacities();
|
||||
|
||||
void forceMgmtdPoolsRefresh();
|
||||
|
||||
static void printSyncNodesResults(NodeType nodeType, NumNodeIDList* addedNodes,
|
||||
NumNodeIDList* removedNodes);
|
||||
|
||||
bool publishTargetStateChanges(UInt16List& targetIDs, UInt8List& oldStates,
|
||||
UInt8List& newStates);
|
||||
|
||||
static bool downloadAllExceededQuotaLists(uint16_t targetId);
|
||||
public:
|
||||
// inliners
|
||||
void setForceTargetStatesUpdate()
|
||||
{
|
||||
std::lock_guard<Mutex> safeLock(forceTargetStatesUpdateMutex);
|
||||
this->forceTargetStatesUpdate = true;
|
||||
}
|
||||
|
||||
void setForcePublishCapacities()
|
||||
{
|
||||
std::lock_guard<Mutex> safeLock(forcePublishCapacitiesMutex);
|
||||
this->forcePublishCapacities = true;
|
||||
}
|
||||
|
||||
void setForceStoragePoolsUpdate()
|
||||
{
|
||||
std::lock_guard<Mutex> lock(forceStoragePoolsUpdateMutex);
|
||||
forceStoragePoolsUpdate = true;
|
||||
}
|
||||
|
||||
void setForceCheckNetwork()
|
||||
{
|
||||
std::lock_guard<Mutex> lock(forceCheckNetworkMutex);
|
||||
forceCheckNetwork = true;
|
||||
}
|
||||
|
||||
private:
|
||||
// inliners
|
||||
bool getAndResetForceTargetStatesUpdate()
|
||||
{
|
||||
std::lock_guard<Mutex> safeLock(forceTargetStatesUpdateMutex);
|
||||
|
||||
bool retVal = this->forceTargetStatesUpdate;
|
||||
|
||||
this->forceTargetStatesUpdate = false;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool getAndResetForcePublishCapacities()
|
||||
{
|
||||
std::lock_guard<Mutex> safeLock(forcePublishCapacitiesMutex);
|
||||
|
||||
bool retVal = this->forcePublishCapacities;
|
||||
|
||||
this->forcePublishCapacities = false;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool getAndResetForceStoragePoolsUpdate()
|
||||
{
|
||||
std::lock_guard<Mutex> lock(forceStoragePoolsUpdateMutex);
|
||||
|
||||
bool retVal = forceStoragePoolsUpdate;
|
||||
forceStoragePoolsUpdate = false;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool getAndResetForceCheckNetwork()
|
||||
{
|
||||
std::lock_guard<Mutex> lock(forceCheckNetworkMutex);
|
||||
|
||||
bool retVal = forceCheckNetwork;
|
||||
forceCheckNetwork = false;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
47
storage/source/components/StorageStatsCollector.cpp
Normal file
47
storage/source/components/StorageStatsCollector.cpp
Normal file
@@ -0,0 +1,47 @@
|
||||
#include <app/App.h>
|
||||
#include <program/Program.h>
|
||||
#include "StorageStatsCollector.h"
|
||||
|
||||
/**
|
||||
* Note: Other than the common StatsCollector::collectStats(), this method can handle multiple work
|
||||
* queues.
|
||||
*/
|
||||
void StorageStatsCollector::collectStats()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
MultiWorkQueueMap* workQueueMap = app->getWorkQueueMap();
|
||||
|
||||
HighResolutionStats newStats;
|
||||
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
|
||||
// get stats from first queue as basis
|
||||
|
||||
MultiWorkQueueMapIter iter = workQueueMap->begin();
|
||||
|
||||
iter->second->getAndResetStats(&newStats);
|
||||
|
||||
// add the stat values from following queues
|
||||
|
||||
iter++;
|
||||
|
||||
for( ; iter != workQueueMap->end(); iter++)
|
||||
{
|
||||
HighResolutionStats currentStats;
|
||||
|
||||
iter->second->getAndResetStats(¤tStats);
|
||||
|
||||
HighResolutionStatsTk::addHighResRawStats(currentStats, newStats);
|
||||
HighResolutionStatsTk::addHighResIncStats(currentStats, newStats);
|
||||
}
|
||||
|
||||
// set current stats time
|
||||
newStats.rawVals.statsTimeMS = TimeAbs().getTimeMS();
|
||||
|
||||
// take care of max history length
|
||||
if(statsList.size() == historyLength)
|
||||
statsList.pop_back();
|
||||
|
||||
// push new stats to front
|
||||
statsList.push_front(newStats);
|
||||
}
|
||||
25
storage/source/components/StorageStatsCollector.h
Normal file
25
storage/source/components/StorageStatsCollector.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/components/StatsCollector.h>
|
||||
|
||||
/**
|
||||
* Common StatsCollector cannot handle multiple work queues, so this derived class overrides
|
||||
* the collectStats() method to handle multiple work queues.
|
||||
*/
|
||||
class StorageStatsCollector : public StatsCollector
|
||||
{
|
||||
public:
|
||||
StorageStatsCollector(unsigned collectIntervalMS, unsigned historyLength):
|
||||
StatsCollector(NULL, collectIntervalMS, historyLength)
|
||||
{
|
||||
// nothing to be done here
|
||||
}
|
||||
|
||||
virtual ~StorageStatsCollector() {}
|
||||
|
||||
|
||||
protected:
|
||||
virtual void collectStats();
|
||||
|
||||
};
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
#include "StorageBenchOperator.h"
|
||||
|
||||
|
||||
int StorageBenchOperator::initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize,
|
||||
int64_t size, int threads, bool odirect, StorageBenchType type)
|
||||
{
|
||||
return this->slave.initAndStartStorageBench(targetIDs, blocksize, size, threads, odirect, type);
|
||||
}
|
||||
|
||||
int StorageBenchOperator::cleanup(UInt16List* targetIDs)
|
||||
{
|
||||
return this->slave.cleanup(targetIDs);
|
||||
}
|
||||
|
||||
int StorageBenchOperator::stopBenchmark()
|
||||
{
|
||||
return this->slave.stopBenchmark();
|
||||
}
|
||||
|
||||
StorageBenchStatus StorageBenchOperator::getStatusWithResults(UInt16List* targetIDs,
|
||||
StorageBenchResultsMap* outResults)
|
||||
{
|
||||
return this->slave.getStatusWithResults(targetIDs, outResults);
|
||||
}
|
||||
|
||||
void StorageBenchOperator::shutdownBenchmark()
|
||||
{
|
||||
this->slave.shutdownBenchmark();
|
||||
}
|
||||
|
||||
void StorageBenchOperator::waitForShutdownBenchmark()
|
||||
{
|
||||
this->slave.waitForShutdownBenchmark();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
45
storage/source/components/benchmarker/StorageBenchOperator.h
Normal file
45
storage/source/components/benchmarker/StorageBenchOperator.h
Normal file
@@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "StorageBenchSlave.h"
|
||||
|
||||
|
||||
class StorageBenchOperator
|
||||
{
|
||||
public:
|
||||
StorageBenchOperator() {}
|
||||
|
||||
int initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize, int64_t size,
|
||||
int threads, bool odirect, StorageBenchType type);
|
||||
|
||||
int cleanup(UInt16List* targetIDs);
|
||||
int stopBenchmark();
|
||||
StorageBenchStatus getStatusWithResults(UInt16List* targetIDs,
|
||||
StorageBenchResultsMap* outResults);
|
||||
void shutdownBenchmark();
|
||||
void waitForShutdownBenchmark();
|
||||
|
||||
private:
|
||||
StorageBenchSlave slave;
|
||||
|
||||
protected:
|
||||
|
||||
public:
|
||||
// inliners
|
||||
|
||||
StorageBenchStatus getStatus()
|
||||
{
|
||||
return this->slave.getStatus();
|
||||
}
|
||||
|
||||
StorageBenchType getType()
|
||||
{
|
||||
return this->slave.getType();
|
||||
}
|
||||
|
||||
int getLastRunErrorCode()
|
||||
{
|
||||
return this->slave.getLastRunErrorCode();
|
||||
}
|
||||
};
|
||||
|
||||
832
storage/source/components/benchmarker/StorageBenchSlave.cpp
Normal file
832
storage/source/components/benchmarker/StorageBenchSlave.cpp
Normal file
@@ -0,0 +1,832 @@
|
||||
#include <common/system/System.h>
|
||||
#include <common/toolkit/StorageTk.h>
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <components/worker/StorageBenchWork.h>
|
||||
#include <program/Program.h>
|
||||
#include "StorageBenchSlave.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#define STORAGEBENCH_STORAGE_SUBDIR_NAME "benchmark"
|
||||
#define STORAGEBENCH_READ_PIPE_TIMEOUT_MS 2000
|
||||
|
||||
|
||||
/*
|
||||
* initialize and starts the storage benchmark with the given informations
|
||||
*
|
||||
* @param targetIDs a list with the targetIDs which the benchmark tests
|
||||
* @param blocksize the blocksize for the benchmark
|
||||
* @param size the size for the benchmark
|
||||
* @param threads the number (simulated clients) of threads for the benchmark
|
||||
* @param type the type of the benchmark
|
||||
* @return the error code, 0 if the benchmark was initialize successful (STORAGEBENCH_ERROR..)
|
||||
*
|
||||
*/
|
||||
int StorageBenchSlave::initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize,
|
||||
int64_t size, int threads, bool odirect, StorageBenchType type)
|
||||
{
|
||||
const char* logContext = "Storage Benchmark (init)";
|
||||
|
||||
int lastError = STORAGEBENCH_ERROR_NO_ERROR;
|
||||
int retVal = STORAGEBENCH_ERROR_NO_ERROR;
|
||||
|
||||
this->resetSelfTerminate();
|
||||
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
if (STORAGEBENCHSTATUS_IS_ACTIVE(this->status))
|
||||
{
|
||||
LogContext(logContext).logErr(
|
||||
std::string("Benchmark is already running. It's not possible to start a benchmark if a"
|
||||
"benchmark is running."));
|
||||
|
||||
retVal = STORAGEBENCH_ERROR_RUNTIME_IS_RUNNING;
|
||||
}
|
||||
else
|
||||
{
|
||||
retVal = initStorageBench(targetIDs, blocksize, size, threads, odirect, type);
|
||||
}
|
||||
|
||||
if(retVal == STORAGEBENCH_ERROR_NO_ERROR)
|
||||
{
|
||||
if (this->status != StorageBenchStatus_INITIALISED)
|
||||
{
|
||||
LogContext(logContext).logErr(
|
||||
std::string("Benchmark not correctly initialized."));
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_UNINITIALIZED;
|
||||
this->status = StorageBenchStatus_ERROR;
|
||||
|
||||
retVal = STORAGEBENCH_ERROR_UNINITIALIZED;
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
this->start();
|
||||
this->status = StorageBenchStatus_RUNNING;
|
||||
lastError = this->lastRunErrorCode;
|
||||
}
|
||||
catch(PThreadCreateException& e)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("Unable to start thread: ") + e.what() );
|
||||
this->status = StorageBenchStatus_ERROR;
|
||||
lastError = this->lastRunErrorCode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(lastError != STORAGEBENCH_ERROR_NO_ERROR)
|
||||
{
|
||||
retVal = lastError;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize the storage benchmark with the given informations
|
||||
*
|
||||
* @param targetIDs a list with the targetIDs which the benchmark tests
|
||||
* @param blocksize the blocksize for the benchmark
|
||||
* @param size the size for the benchmark
|
||||
* @param threads the number (simulated clients) of threads for the benchmark
|
||||
* @param type the type of the benchmark
|
||||
* @return the error code, 0 if the benchmark was initialize successful (STORAGEBENCH_ERROR..)
|
||||
*
|
||||
*/
|
||||
int StorageBenchSlave::initStorageBench(UInt16List* targetIDs, int64_t blocksize,
|
||||
int64_t size, int threads, bool odirect, StorageBenchType type)
|
||||
{
|
||||
const char* logContext = "Storage Benchmark (init)";
|
||||
LogContext(logContext).log(Log_DEBUG, "Initializing benchmark ...");
|
||||
|
||||
this->benchType = type;
|
||||
this->targetIDs = new auto(*targetIDs);
|
||||
this->blocksize = blocksize;
|
||||
this->size = size;
|
||||
this->numThreads = threads;
|
||||
this->odirect = odirect;
|
||||
this->numThreadsDone = 0;
|
||||
|
||||
initThreadData();
|
||||
|
||||
if (!initTransferData())
|
||||
{
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_INIT_TRANSFER_DATA;
|
||||
this->status = StorageBenchStatus_ERROR;
|
||||
return STORAGEBENCH_ERROR_INIT_TRANSFER_DATA;
|
||||
}
|
||||
|
||||
if (this->benchType == StorageBenchType_READ)
|
||||
{
|
||||
if (!checkReadData())
|
||||
{
|
||||
LogContext(logContext).logErr(
|
||||
std::string("No (or not enough) data for read benchmark available. "
|
||||
"Start a write benchmark with the same size parameter before the read benchmark.") );
|
||||
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_INIT_READ_DATA;
|
||||
this->status = StorageBenchStatus_ERROR;
|
||||
return STORAGEBENCH_ERROR_INIT_READ_DATA;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (this->benchType == StorageBenchType_WRITE)
|
||||
{
|
||||
if (!createBenchmarkFolder() )
|
||||
{
|
||||
LogContext(logContext).logErr(
|
||||
std::string("Couldn't create the benchmark folder."));
|
||||
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_INIT_CREATE_BENCH_FOLDER;
|
||||
this->status = StorageBenchStatus_ERROR;
|
||||
return STORAGEBENCH_ERROR_INIT_CREATE_BENCH_FOLDER;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LogContext(logContext).logErr(std::string(
|
||||
"Unknown benchmark type: " + StringTk::uintToStr(this->benchType) ) );
|
||||
return STORAGEBENCH_ERROR_INITIALIZATION_ERROR;
|
||||
}
|
||||
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_NO_ERROR;
|
||||
this->status = StorageBenchStatus_INITIALISED;
|
||||
|
||||
LogContext(logContext).log(Log_DEBUG, std::string("Benchmark initialized."));
|
||||
|
||||
return STORAGEBENCH_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize the data which will be written to the disk, the size of the transfer data a equal
|
||||
* to the blocksize and initialized with random characters
|
||||
*
|
||||
* @return true if the random data are initialized,
|
||||
* false if a error occurred
|
||||
*
|
||||
*/
|
||||
bool StorageBenchSlave::initTransferData()
|
||||
{
|
||||
const char* logContext = "Storage Benchmark (init buf)";
|
||||
LogContext(logContext).log(Log_DEBUG, std::string("Initializing random data..."));
|
||||
|
||||
void* rawTransferData;
|
||||
if (posix_memalign(&rawTransferData, 4096, blocksize) != 0)
|
||||
return false;
|
||||
transferData.reset(static_cast<char*>(rawTransferData));
|
||||
|
||||
Random randomizer = Random();
|
||||
|
||||
for (int64_t counter = 0; counter < this->blocksize; counter++)
|
||||
{
|
||||
this->transferData[counter] = randomizer.getNextInt();
|
||||
}
|
||||
|
||||
LogContext(logContext).log(Log_DEBUG, std::string("Random data initialized."));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* frees the transfer data
|
||||
*/
|
||||
void StorageBenchSlave::freeTransferData()
|
||||
{
|
||||
transferData.reset();
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize the informations about the threads
|
||||
*
|
||||
*/
|
||||
void StorageBenchSlave::initThreadData()
|
||||
{
|
||||
const char* logContext = "Storage Benchmark (init)";
|
||||
LogContext(logContext).log(Log_DEBUG, std::string("Initializing thread data..."));
|
||||
|
||||
this->threadData.clear();
|
||||
|
||||
int allThreadCounter = 0;
|
||||
for (UInt16ListIter iter = targetIDs->begin(); iter != targetIDs->end(); iter++)
|
||||
{
|
||||
for (int threadCount = 0; threadCount < this->numThreads; threadCount++)
|
||||
{
|
||||
StorageBenchThreadData data;
|
||||
data.targetID = *iter;
|
||||
data.targetThreadID = threadCount;
|
||||
data.engagedSize = 0;
|
||||
data.fileDescriptor = 0;
|
||||
data.neededTime = 0;
|
||||
|
||||
|
||||
this->threadData[allThreadCounter] = data;
|
||||
allThreadCounter++;
|
||||
}
|
||||
}
|
||||
|
||||
LogContext(logContext).log(Log_DEBUG, "Thread data initialized.");
|
||||
}
|
||||
|
||||
/*
|
||||
* starts the benchmark, a read or a write benchmark
|
||||
*
|
||||
*/
|
||||
void StorageBenchSlave::run()
|
||||
{
|
||||
const char* logContext = "Storage Benchmark (run)";
|
||||
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark started..."));
|
||||
|
||||
App* app = Program::getApp();
|
||||
|
||||
bool openRes = openFiles();
|
||||
if (openRes)
|
||||
{
|
||||
this->startTime.setToNow();
|
||||
|
||||
// add a work package into the worker queue for every thread
|
||||
for(StorageBenchThreadDataMapIter iter = threadData.begin();
|
||||
iter != threadData.end();
|
||||
iter++)
|
||||
{
|
||||
LOG_DEBUG(logContext, Log_DEBUG, std::string("Add work for target: ") +
|
||||
StringTk::uintToStr(iter->second.targetID) );
|
||||
LOG_DEBUG(logContext, Log_DEBUG, std::string("- threadID: ") +
|
||||
StringTk::intToStr(iter->first) );
|
||||
LOG_DEBUG(logContext, Log_DEBUG, std::string("- type: ") +
|
||||
StringTk::intToStr(this->benchType) );
|
||||
|
||||
StorageBenchWork* work = new StorageBenchWork(iter->second.targetID, iter->first,
|
||||
iter->second.fileDescriptor, this->benchType, getNextPackageSize(iter->first),
|
||||
this->threadCommunication, this->transferData.get());
|
||||
|
||||
app->getWorkQueue(iter->second.targetID)->addIndirectWork(work);
|
||||
}
|
||||
|
||||
while(getStatus() == StorageBenchStatus_RUNNING)
|
||||
{
|
||||
int threadID = 0;
|
||||
|
||||
if (this->threadCommunication->waitForIncomingData(STORAGEBENCH_READ_PIPE_TIMEOUT_MS))
|
||||
{
|
||||
this->threadCommunication->getReadFD()->readExact(&threadID, sizeof(int));
|
||||
}
|
||||
else
|
||||
{
|
||||
threadID = STORAGEBENCH_ERROR_COM_TIMEOUT;
|
||||
}
|
||||
|
||||
if (this->getSelfTerminate())
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("Abort benchmark."));
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_ABORT_BENCHMARK;
|
||||
setStatus(StorageBenchStatus_STOPPING);
|
||||
|
||||
if (threadID != STORAGEBENCH_ERROR_COM_TIMEOUT)
|
||||
{
|
||||
this->threadData[threadID].neededTime = this->startTime.elapsedMS();
|
||||
this->numThreadsDone++;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
else
|
||||
if (threadID == STORAGEBENCH_ERROR_WORKER_ERROR)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("I/O operation on disk failed."));
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_WORKER_ERROR;
|
||||
setStatus(StorageBenchStatus_STOPPING);
|
||||
|
||||
// increment the thread counter, because the thread which sent this error hasn't a
|
||||
// work package in the queue of the workers but the response from the other threads
|
||||
// must be collected
|
||||
this->numThreadsDone++;
|
||||
|
||||
break;
|
||||
}
|
||||
else
|
||||
if (threadID == STORAGEBENCH_ERROR_COM_TIMEOUT)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
else
|
||||
if ( (threadID < -1) || ( ( (unsigned)threadID) >= this->threadData.size() ) )
|
||||
{ // error if the worker reports an unknown threadID
|
||||
std::string errorMessage("Unknown thread ID: " + StringTk::intToStr(threadID) + "; "
|
||||
"map size: " + StringTk::uintToStr(this->threadData.size() ) );
|
||||
|
||||
LogContext(logContext).logErr(errorMessage);
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_ERROR;
|
||||
setStatus(StorageBenchStatus_STOPPING);
|
||||
|
||||
// increment the thread counter, because the thread which sent this error hasn't a
|
||||
// work package in the queue of the workers but the response from the other threads
|
||||
// must be collected
|
||||
this->numThreadsDone++;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
StorageBenchThreadData* currentData = &this->threadData[threadID];
|
||||
int64_t workSize = getNextPackageSize(threadID);
|
||||
|
||||
// add a new work package into the workers queue for the reported thread only if the
|
||||
// data size for the thread is bigger then 0
|
||||
if (workSize != 0)
|
||||
{
|
||||
StorageBenchWork* work = new StorageBenchWork(currentData->targetID, threadID,
|
||||
currentData->fileDescriptor, this->benchType, workSize, this->threadCommunication,
|
||||
this->transferData.get());
|
||||
app->getWorkQueue(currentData->targetID)->addIndirectWork(work);
|
||||
}
|
||||
else
|
||||
{
|
||||
// the thread has finished his work
|
||||
currentData->neededTime = this->startTime.elapsedMS();
|
||||
this->numThreadsDone++;
|
||||
}
|
||||
|
||||
if (this->numThreadsDone >= this->threadData.size())
|
||||
{
|
||||
setStatus(StorageBenchStatus_FINISHING);
|
||||
}
|
||||
}
|
||||
|
||||
//collect all responses from the worker
|
||||
while ( (this->numThreadsDone < this->threadData.size()) && app->getWorkersRunning() )
|
||||
{
|
||||
int threadID = 0;
|
||||
|
||||
if (this->threadCommunication->waitForIncomingData(STORAGEBENCH_READ_PIPE_TIMEOUT_MS))
|
||||
{
|
||||
this->threadCommunication->getReadFD()->readExact(&threadID, sizeof(int));
|
||||
}
|
||||
else
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG_DEBUG(logContext, Log_DEBUG, std::string("Collect response from worker."));
|
||||
|
||||
if(threadID >= 0)
|
||||
this->threadData[threadID].neededTime = this->startTime.elapsedMS();
|
||||
|
||||
this->numThreadsDone++;
|
||||
}
|
||||
|
||||
// all workers finished/stopped ==> close all files
|
||||
closeFiles();
|
||||
freeTransferData();
|
||||
|
||||
// all threads have finished the work or the benchmark was stopped, set new status
|
||||
if (this->getStatus() == StorageBenchStatus_FINISHING)
|
||||
{
|
||||
this->setStatus(StorageBenchStatus_FINISHED);
|
||||
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark finished."));
|
||||
}
|
||||
else
|
||||
if (this->getStatus() == StorageBenchStatus_STOPPING)
|
||||
{
|
||||
if (this->lastRunErrorCode != STORAGEBENCH_ERROR_NO_ERROR)
|
||||
{
|
||||
this->setStatus(StorageBenchStatus_ERROR);
|
||||
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark stopped with errors."));
|
||||
}
|
||||
else
|
||||
{
|
||||
this->setStatus(StorageBenchStatus_STOPPED);
|
||||
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark stopped."));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_OPEN_FILES;
|
||||
setStatus(StorageBenchStatus_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* checks the size of the benchmark files, the benchmark files must be big enough for the
|
||||
* read benchmark
|
||||
*
|
||||
* @return true if data for a read benchmark exists,
|
||||
* false if the files to small or a error occurred
|
||||
*
|
||||
*/
|
||||
bool StorageBenchSlave::checkReadData()
|
||||
{
|
||||
const char* logContext = "Storage Benchmark (check)";
|
||||
|
||||
for (StorageBenchThreadDataMapIter iter = threadData.begin();
|
||||
iter != threadData.end(); iter++)
|
||||
{
|
||||
auto* const target = Program::getApp()->getStorageTargets()->getTarget(iter->second.targetID);
|
||||
if (!target)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("TargetID unknown."));
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string path = target->getPath().str();
|
||||
|
||||
path = path + "/" + STORAGEBENCH_STORAGE_SUBDIR_NAME + "/" +
|
||||
StringTk::uintToStr(iter->second.targetThreadID);
|
||||
|
||||
int error = -1;
|
||||
struct stat fileStat;
|
||||
error = stat(path.c_str(), &fileStat);
|
||||
|
||||
if (error != -1)
|
||||
{
|
||||
if (fileStat.st_size < this->size)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("Existing benchmark file too small. "
|
||||
"Requested file size: " + StringTk::int64ToStr(this->size) + " "
|
||||
"File size: " + StringTk::intToStr(fileStat.st_size)));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("Couldn't stat() benchmark file. SysErr: ") +
|
||||
System::getErrString() );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* creates the benchmark folder in the storage target folder
|
||||
*
|
||||
* @return true if all benchmark folders are created,
|
||||
* false if a error occurred
|
||||
*
|
||||
*/
|
||||
bool StorageBenchSlave::createBenchmarkFolder()
|
||||
{
|
||||
const char* logContext = "Storage Benchmark (mkdir)";
|
||||
|
||||
for(UInt16ListIter iter = this->targetIDs->begin(); iter != this->targetIDs->end(); iter++)
|
||||
{
|
||||
auto* const target = Program::getApp()->getStorageTargets()->getTarget(*iter);
|
||||
if (!target)
|
||||
{
|
||||
LogContext(logContext).logErr("TargetID unknown: " + StringTk::uintToStr(*iter) );
|
||||
return false;
|
||||
}
|
||||
|
||||
Path currentPath(target->getPath() / STORAGEBENCH_STORAGE_SUBDIR_NAME);
|
||||
if(!StorageTk::createPathOnDisk(currentPath, false))
|
||||
{
|
||||
LogContext(logContext).logErr(
|
||||
std::string("Unable to create benchmark directory: " + currentPath.str() ) );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* opens all needed files for the benchmark. This method will be executed at the start
|
||||
* of the benchmark
|
||||
*
|
||||
* @return true if all files are opened,
|
||||
* false if a error occurred
|
||||
*
|
||||
*/
|
||||
bool StorageBenchSlave::openFiles()
|
||||
{
|
||||
const char* logContext = "Storage Benchmark (open)";
|
||||
mode_t openMode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
|
||||
|
||||
for(StorageBenchThreadDataMapIter iter = threadData.begin();
|
||||
iter != threadData.end();
|
||||
iter++)
|
||||
{
|
||||
auto* const target = Program::getApp()->getStorageTargets()->getTarget(iter->second.targetID);
|
||||
if (!target)
|
||||
{
|
||||
LogContext(logContext).logErr(
|
||||
"TargetID unknown: " + StringTk::uintToStr(iter->second.targetID) );
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string path = target->getPath().str();
|
||||
|
||||
path = path + "/" STORAGEBENCH_STORAGE_SUBDIR_NAME "/" +
|
||||
StringTk::uintToStr(iter->second.targetThreadID);
|
||||
|
||||
int fileDescriptor = -1;
|
||||
|
||||
// open file
|
||||
|
||||
int directFlag = this->odirect ? O_DIRECT : 0;
|
||||
if(this->benchType == StorageBenchType_READ)
|
||||
fileDescriptor = open(path.c_str(), O_RDONLY | directFlag);
|
||||
else
|
||||
fileDescriptor = open(path.c_str(), O_CREAT | O_WRONLY | O_TRUNC | directFlag, openMode);
|
||||
|
||||
if (fileDescriptor != -1)
|
||||
iter->second.fileDescriptor = fileDescriptor;
|
||||
else
|
||||
{ // open failed
|
||||
LogContext(logContext).logErr("Couldn't open benchmark file: " + path + "; "
|
||||
"SysErr: " + System::getErrString() );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StorageBenchSlave::closeFiles()
|
||||
{
|
||||
const char* logContext = "Storage Benchmark (close)";
|
||||
|
||||
bool retVal = true;
|
||||
|
||||
for(StorageBenchThreadDataMapIter iter = threadData.begin();
|
||||
iter != threadData.end();
|
||||
iter++)
|
||||
{
|
||||
int tmpRetVal = close(iter->second.fileDescriptor);
|
||||
|
||||
if (tmpRetVal != 0)
|
||||
{
|
||||
int closeErrno = errno;
|
||||
|
||||
auto* const target = Program::getApp()->getStorageTargets()->getTarget(
|
||||
iter->second.targetID);
|
||||
if (!target)
|
||||
{
|
||||
LogContext(logContext).logErr(
|
||||
"TargetID unknown: " + StringTk::uintToStr(iter->second.targetID) );
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string path = target->getPath().str();
|
||||
|
||||
path = path + "/" + STORAGEBENCH_STORAGE_SUBDIR_NAME + "/" +
|
||||
StringTk::uintToStr(iter->second.targetThreadID);
|
||||
|
||||
LogContext(logContext).logErr("Couldn't close file: " + path + "; "
|
||||
"SysErr: " + System::getErrString(closeErrno) );
|
||||
|
||||
retVal = false;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/*
|
||||
* calculates the size (bytes) of the data which will be written on the disk by the worker with
|
||||
* the next work package for the given thread
|
||||
*
|
||||
* @param threadID the threadID
|
||||
* @return the size of the data for next work package in bytes,
|
||||
* if 0 the given thread has written all data
|
||||
*
|
||||
*/
|
||||
int64_t StorageBenchSlave::getNextPackageSize(int threadID)
|
||||
{
|
||||
int64_t retVal = BEEGFS_MIN(this->blocksize,
|
||||
this->size - this->threadData[threadID].engagedSize);
|
||||
this->threadData[threadID].engagedSize += retVal;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* calculates the throughput (kB/s) of the given target
|
||||
*
|
||||
* @param targetID the targetID
|
||||
* @return the throughput of the given target in kilobytes per second
|
||||
*
|
||||
*/
|
||||
int64_t StorageBenchSlave::getResult(uint16_t targetID)
|
||||
{
|
||||
int64_t size = 0;
|
||||
int64_t time = 0;
|
||||
|
||||
for(StorageBenchThreadDataMapIter iter = this->threadData.begin();
|
||||
iter != this->threadData.end();
|
||||
iter++)
|
||||
{
|
||||
if (iter->second.targetID == targetID)
|
||||
{
|
||||
// summarize the size of the different threads which worked on a target
|
||||
size += iter->second.engagedSize;
|
||||
|
||||
// search the thread with the longest runtime
|
||||
if (time < this->threadData[iter->first].neededTime)
|
||||
time = this->threadData[iter->first].neededTime;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// if the threads are not finished use the needed time up to now
|
||||
if (time == 0)
|
||||
time = this->startTime.elapsedMS();
|
||||
|
||||
// if no results available return zero
|
||||
if ( (size == 0) || (time == 0) )
|
||||
return 0;
|
||||
|
||||
// input: size in bytes, time in milliseconds,
|
||||
// output: in kilobytes per second
|
||||
return ( (size * 1000) / (time * 1024) );
|
||||
}
|
||||
|
||||
/*
|
||||
* calculates the throughput (kB/s) of the given targets
|
||||
*
|
||||
* @param targetIDs the list of targetIDs
|
||||
* @param outResults a initialized map for the results, which contains the results after
|
||||
* execution of the method
|
||||
*
|
||||
*/
|
||||
void StorageBenchSlave::getResults(UInt16List* targetIDs, StorageBenchResultsMap* outResults)
|
||||
{
|
||||
for (UInt16ListIter iter = targetIDs->begin(); iter != targetIDs->end(); iter++)
|
||||
{
|
||||
(*outResults)[*iter] = getResult(*iter);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* calculates the throughput (kB/s) of all targets
|
||||
*
|
||||
* @param outResults a initialized map for the results, which contains the results after
|
||||
* execution of the method
|
||||
*
|
||||
*/
|
||||
void StorageBenchSlave::getAllResults(StorageBenchResultsMap* outResults)
|
||||
{
|
||||
for (UInt16ListIter iter = this->targetIDs->begin(); iter != this->targetIDs->end(); iter++)
|
||||
{
|
||||
(*outResults)[*iter] = getResult(*iter);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* calculates the throughput (kB/s) of the given targets and returns the status of the benchmark
|
||||
*
|
||||
* @param targetIDs the list of targetIDs
|
||||
* @param outResults a initialized map for the results, which contains the results after
|
||||
* execution of the method
|
||||
* @return the status of the benchmark
|
||||
*
|
||||
*/
|
||||
StorageBenchStatus StorageBenchSlave::getStatusWithResults(UInt16List* targetIDs,
|
||||
StorageBenchResultsMap* outResults)
|
||||
{
|
||||
getResults(targetIDs, outResults);
|
||||
return getStatus();
|
||||
}
|
||||
|
||||
/*
|
||||
* stop the benchmark
|
||||
*
|
||||
* @return the error code, 0 if the benchmark will stop (STORAGEBENCH_ERROR..)
|
||||
*
|
||||
*/
|
||||
int StorageBenchSlave::stopBenchmark()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
if (this->status == StorageBenchStatus_RUNNING)
|
||||
{
|
||||
this->status = StorageBenchStatus_STOPPING;
|
||||
return STORAGEBENCH_ERROR_NO_ERROR;
|
||||
}
|
||||
else
|
||||
if(this->status == StorageBenchStatus_FINISHING || this->status == StorageBenchStatus_STOPPING)
|
||||
{
|
||||
return STORAGEBENCH_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
return STORAGEBENCH_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* deletes all files in the benchmark folder of the given targets
|
||||
*
|
||||
* @param targetIDs the list of targetIDs which will be cleaned
|
||||
* @return the error code, 0 if the cleanup was successful (STORAGEBENCH_ERROR..)
|
||||
*
|
||||
*/
|
||||
int StorageBenchSlave::cleanup(UInt16List* targetIDs)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
const char* logContext = "Storage Benchmark (cleanup)";
|
||||
|
||||
//cleanup only possible if no benchmark is running
|
||||
if (STORAGEBENCHSTATUS_IS_ACTIVE(this->status))
|
||||
{
|
||||
LogContext(logContext).logErr("Cleanup not possible benchmark is running");
|
||||
|
||||
return STORAGEBENCH_ERROR_RUNTIME_CLEANUP_JOB_ACTIVE;
|
||||
}
|
||||
|
||||
for(UInt16ListIter iter = targetIDs->begin(); iter != targetIDs->end(); iter++)
|
||||
{
|
||||
auto* const target = Program::getApp()->getStorageTargets()->getTarget(*iter);
|
||||
if (!target)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("TargetID unknown."));
|
||||
return STORAGEBENCH_ERROR_RUNTIME_UNKNOWN_TARGET;
|
||||
}
|
||||
|
||||
std::string path = target->getPath().str();
|
||||
|
||||
path.append("/");
|
||||
path.append(STORAGEBENCH_STORAGE_SUBDIR_NAME);
|
||||
path.append("/");
|
||||
|
||||
DIR* dir = opendir(path.c_str());
|
||||
if (dir == NULL)
|
||||
{
|
||||
int openDirErrno = errno;
|
||||
int errRetVal;
|
||||
|
||||
if (openDirErrno == ENOENT)
|
||||
{ // benchmark directory doesn't exist, no benchmark data for cleanup
|
||||
errRetVal = STORAGEBENCH_ERROR_NO_ERROR;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
|
||||
errRetVal = STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
|
||||
|
||||
LogContext(logContext).logErr("Unable to delete files in benchmark directory: " + path +
|
||||
"; failed with SysErr: " + System::getErrString(errno));
|
||||
}
|
||||
|
||||
return errRetVal;
|
||||
}
|
||||
|
||||
struct dirent* dirEntry = StorageTk::readdirFiltered(dir);
|
||||
|
||||
while (dirEntry)
|
||||
{
|
||||
struct stat statData;
|
||||
std::string filePath(path + dirEntry->d_name);
|
||||
|
||||
int retVal = stat(filePath.c_str(), &statData);
|
||||
if ((retVal == 0) && (S_ISREG(statData.st_mode)) )
|
||||
{
|
||||
|
||||
int error = unlink(filePath.c_str());
|
||||
|
||||
if(error != 0)
|
||||
{
|
||||
LogContext(logContext).logErr(
|
||||
std::string("Unable to delete files in benchmark directory: "
|
||||
+ path));
|
||||
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
|
||||
|
||||
closedir(dir);
|
||||
return STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
|
||||
}
|
||||
}
|
||||
else
|
||||
if(!S_ISREG(statData.st_mode))
|
||||
LogContext(logContext).logErr("Unable to delete files in benchmark directory: " +
|
||||
path + " It's not a regular file.");
|
||||
else
|
||||
LogContext(logContext).logErr("Unable to delete files in benchmark directory: " + path);
|
||||
|
||||
dirEntry = StorageTk::readdirFiltered(dir);
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
}
|
||||
|
||||
return STORAGEBENCH_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* aborts the benchmark, will be used if SIGINT received
|
||||
*
|
||||
*/
|
||||
void StorageBenchSlave::shutdownBenchmark()
|
||||
{
|
||||
this->selfTerminate();
|
||||
}
|
||||
|
||||
void StorageBenchSlave::waitForShutdownBenchmark()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
while(STORAGEBENCHSTATUS_IS_ACTIVE(this->status))
|
||||
{
|
||||
this->statusChangeCond.wait(&this->statusMutex);
|
||||
}
|
||||
}
|
||||
145
storage/source/components/benchmarker/StorageBenchSlave.h
Normal file
145
storage/source/components/benchmarker/StorageBenchSlave.h
Normal file
@@ -0,0 +1,145 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/benchmark/StorageBench.h>
|
||||
#include <common/threading/Condition.h>
|
||||
#include <common/threading/PThread.h>
|
||||
#include <common/toolkit/Pipe.h>
|
||||
#include <common/toolkit/TimeFine.h>
|
||||
#include <common/Common.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
// struct for the informations about a thread which simulates a client
|
||||
struct StorageBenchThreadData
|
||||
{
|
||||
uint16_t targetID;
|
||||
int targetThreadID;
|
||||
int64_t engagedSize; // amount of data which was submitted for write/read
|
||||
int fileDescriptor;
|
||||
int64_t neededTime;
|
||||
};
|
||||
|
||||
// deleter functor for transferData
|
||||
struct TransferDataDeleter {
|
||||
void operator()(char* transferData) { free(transferData); }
|
||||
};
|
||||
|
||||
// map for the informations about a thread; key: virtual threadID, value: information about thread
|
||||
typedef std::map<int, StorageBenchThreadData> StorageBenchThreadDataMap;
|
||||
typedef StorageBenchThreadDataMap::iterator StorageBenchThreadDataMapIter;
|
||||
typedef StorageBenchThreadDataMap::const_iterator StorageBenchThreadDataMapCIter;
|
||||
typedef StorageBenchThreadDataMap::value_type StorageBenchThreadDataMapVal;
|
||||
|
||||
|
||||
|
||||
class StorageBenchSlave : public PThread
|
||||
{
|
||||
public:
|
||||
StorageBenchSlave()
|
||||
: PThread("StorageBenchSlave"),
|
||||
threadCommunication(new Pipe(false, false) ),
|
||||
log("Storage Benchmark"),
|
||||
lastRunErrorCode(STORAGEBENCH_ERROR_NO_ERROR),
|
||||
status(StorageBenchStatus_UNINITIALIZED),
|
||||
benchType(StorageBenchType_NONE),
|
||||
blocksize(1), // useless defaults
|
||||
size(1), // useless defaults
|
||||
numThreads(1), // useless defaults
|
||||
numThreadsDone(0),
|
||||
targetIDs(NULL),
|
||||
transferData(nullptr)
|
||||
{ }
|
||||
|
||||
virtual ~StorageBenchSlave()
|
||||
{
|
||||
SAFE_DELETE(this->threadCommunication);
|
||||
SAFE_DELETE(this->targetIDs);
|
||||
}
|
||||
|
||||
int initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize, int64_t size,
|
||||
int threads, bool odirect, StorageBenchType type);
|
||||
|
||||
int cleanup(UInt16List* targetIDs);
|
||||
int stopBenchmark();
|
||||
StorageBenchStatus getStatusWithResults(UInt16List* targetIDs,
|
||||
StorageBenchResultsMap* outResults);
|
||||
void shutdownBenchmark();
|
||||
void waitForShutdownBenchmark();
|
||||
|
||||
protected:
|
||||
|
||||
private:
|
||||
Pipe* threadCommunication;
|
||||
Mutex statusMutex;
|
||||
Condition statusChangeCond;
|
||||
|
||||
LogContext log;
|
||||
int lastRunErrorCode; // STORAGEBENCH_ERROR_...
|
||||
|
||||
StorageBenchStatus status;
|
||||
StorageBenchType benchType;
|
||||
int64_t blocksize;
|
||||
int64_t size;
|
||||
int numThreads;
|
||||
bool odirect;
|
||||
unsigned int numThreadsDone;
|
||||
|
||||
UInt16List* targetIDs;
|
||||
StorageBenchThreadDataMap threadData;
|
||||
std::unique_ptr<char[], TransferDataDeleter> transferData;
|
||||
|
||||
TimeFine startTime;
|
||||
|
||||
|
||||
virtual void run();
|
||||
|
||||
int initStorageBench(UInt16List* targetIDs, int64_t blocksize, int64_t size,
|
||||
int threads, bool odirect, StorageBenchType type);
|
||||
bool initTransferData(void);
|
||||
void initThreadData();
|
||||
void freeTransferData();
|
||||
|
||||
bool checkReadData(void);
|
||||
bool createBenchmarkFolder(void);
|
||||
bool openFiles(void);
|
||||
bool closeFiles(void);
|
||||
|
||||
int64_t getNextPackageSize(int threadID);
|
||||
int64_t getResult(uint16_t targetID);
|
||||
void getResults(UInt16List* targetIDs, StorageBenchResultsMap* outResults);
|
||||
void getAllResults(StorageBenchResultsMap* outResults);
|
||||
|
||||
void setStatus(StorageBenchStatus newStatus)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
this->status = newStatus;
|
||||
this->statusChangeCond.broadcast();
|
||||
}
|
||||
|
||||
public:
|
||||
//public inliners
|
||||
int getLastRunErrorCode()
|
||||
{
|
||||
return this->lastRunErrorCode;
|
||||
}
|
||||
|
||||
StorageBenchStatus getStatus()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
return this->status;
|
||||
}
|
||||
|
||||
StorageBenchType getType()
|
||||
{
|
||||
return this->benchType;
|
||||
}
|
||||
|
||||
UInt16List* getTargetIDs()
|
||||
{
|
||||
return this->targetIDs;
|
||||
}
|
||||
};
|
||||
|
||||
745
storage/source/components/buddyresyncer/BuddyResyncJob.cpp
Normal file
745
storage/source/components/buddyresyncer/BuddyResyncJob.cpp
Normal file
@@ -0,0 +1,745 @@
|
||||
#include <program/Program.h>
|
||||
|
||||
#include <common/components/worker/IncSyncedCounterWork.h>
|
||||
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
|
||||
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
|
||||
#include <common/net/message/storage/mirroring/StorageResyncStartedMsg.h>
|
||||
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include "BuddyResyncJob.h"
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
#define BUDDYRESYNCJOB_MAXDIRWALKDEPTH 2
|
||||
|
||||
BuddyResyncJob::BuddyResyncJob(uint16_t targetID) :
|
||||
PThread("BuddyResyncJob_" + StringTk::uintToStr(targetID)),
|
||||
targetID(targetID),
|
||||
status(BuddyResyncJobState_NOTSTARTED),
|
||||
startTime(0), endTime(0)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
unsigned numGatherSlaves = app->getConfig()->getTuneNumResyncGatherSlaves();
|
||||
unsigned numSyncSlavesTotal = app->getConfig()->getTuneNumResyncSlaves();
|
||||
unsigned numFileSyncSlaves = BEEGFS_MAX((numSyncSlavesTotal / 2), 1);
|
||||
unsigned numDirSyncSlaves = BEEGFS_MAX((numSyncSlavesTotal / 2), 1);
|
||||
|
||||
// prepare slaves (vectors) and result vector
|
||||
gatherSlaveVec.resize(numGatherSlaves);
|
||||
fileSyncSlaveVec.resize(numFileSyncSlaves);
|
||||
dirSyncSlaveVec.resize(numDirSyncSlaves);
|
||||
}
|
||||
|
||||
BuddyResyncJob::~BuddyResyncJob()
|
||||
{
|
||||
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin(); iter != gatherSlaveVec.end();
|
||||
iter++)
|
||||
{
|
||||
BuddyResyncerGatherSlave* slave = *iter;
|
||||
SAFE_DELETE(slave);
|
||||
}
|
||||
|
||||
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
|
||||
iter != fileSyncSlaveVec.end(); iter++)
|
||||
{
|
||||
BuddyResyncerFileSyncSlave* slave = *iter;
|
||||
SAFE_DELETE(slave);
|
||||
}
|
||||
|
||||
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
|
||||
iter != dirSyncSlaveVec.end(); iter++)
|
||||
{
|
||||
BuddyResyncerDirSyncSlave* slave = *iter;
|
||||
SAFE_DELETE(slave);
|
||||
}
|
||||
}
|
||||
|
||||
void BuddyResyncJob::run()
|
||||
{
|
||||
// make sure only one job at a time can run!
|
||||
{
|
||||
std::lock_guard<Mutex> mutexLock(statusMutex);
|
||||
|
||||
if (status == BuddyResyncJobState_RUNNING)
|
||||
{
|
||||
LogContext(__func__).logErr("Refusing to run same BuddyResyncJob twice!");
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
status = BuddyResyncJobState_RUNNING;
|
||||
startTime = time(NULL);
|
||||
endTime = 0;
|
||||
}
|
||||
}
|
||||
|
||||
App* app = Program::getApp();
|
||||
StorageTargets* storageTargets = app->getStorageTargets();
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
|
||||
TargetMapper* targetMapper = app->getTargetMapper();
|
||||
NodeStoreServers* storageNodes = app->getStorageNodes();
|
||||
WorkerList* workerList = app->getWorkers();
|
||||
|
||||
bool startGatherSlavesRes;
|
||||
bool startSyncSlavesRes;
|
||||
|
||||
std::string targetPath;
|
||||
std::string chunksPath;
|
||||
|
||||
bool buddyCommIsOverride = false; // treat errors during lastbuddycomm read as "0, no override"
|
||||
int64_t lastBuddyCommTimeSecs;
|
||||
int64_t lastBuddyCommSafetyThresholdSecs;
|
||||
bool checkTopLevelDirRes;
|
||||
bool walkRes;
|
||||
|
||||
auto& target = *storageTargets->getTargets().at(targetID);
|
||||
|
||||
shallAbort.setZero();
|
||||
targetWasOffline = false;
|
||||
|
||||
// delete sync candidates and gather queue; just in case there was something from a previous run
|
||||
syncCandidates.clear();
|
||||
gatherSlavesWorkQueue.clear();
|
||||
|
||||
target.setBuddyResyncInProgress(true);
|
||||
|
||||
LogContext(__func__).log(Log_NOTICE,
|
||||
"Started resync of targetID " + StringTk::uintToStr(targetID));
|
||||
|
||||
// before starting the threads make sure every worker knows about the resync (the current work
|
||||
// package must be finished), for that we use a dummy package
|
||||
Mutex mutex;
|
||||
Condition counterIncrementedCond;
|
||||
|
||||
SynchronizedCounter numReadyWorkers;
|
||||
size_t numWorkers = workerList->size();
|
||||
for (WorkerListIter iter = workerList->begin(); iter != workerList->end(); iter++)
|
||||
{
|
||||
Worker* worker = *iter;
|
||||
PersonalWorkQueue* personalQueue = worker->getPersonalWorkQueue();
|
||||
MultiWorkQueue* workQueue = worker->getWorkQueue();
|
||||
IncSyncedCounterWork* incCounterWork = new IncSyncedCounterWork(&numReadyWorkers);
|
||||
|
||||
workQueue->addPersonalWork(incCounterWork, personalQueue);
|
||||
}
|
||||
|
||||
numReadyWorkers.waitForCount(numWorkers);
|
||||
|
||||
// notify buddy, that resync started and wait for confirmation
|
||||
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(targetID);
|
||||
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
|
||||
auto buddyNode = storageNodes->referenceNode(buddyNodeID);
|
||||
StorageResyncStartedMsg storageResyncStartedMsg(buddyTargetID);
|
||||
const auto respMsg = MessagingTk::requestResponse(*buddyNode, storageResyncStartedMsg,
|
||||
NETMSGTYPE_StorageResyncStartedResp);
|
||||
|
||||
std::pair<bool, std::chrono::system_clock::time_point> lastBuddyComm;
|
||||
|
||||
if (!respMsg)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Unable to notify buddy about resync attempt. Resync will not start.",
|
||||
targetID, buddyTargetID);
|
||||
setStatus(BuddyResyncJobState_FAILURE);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
startGatherSlavesRes = startGatherSlaves(target);
|
||||
if (!startGatherSlavesRes)
|
||||
{
|
||||
setStatus(BuddyResyncJobState_FAILURE);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
startSyncSlavesRes = startSyncSlaves();
|
||||
if (!startSyncSlavesRes)
|
||||
{
|
||||
setStatus(BuddyResyncJobState_FAILURE);
|
||||
|
||||
// terminate gather slaves
|
||||
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
|
||||
gatherSlaveVec[i]->selfTerminate();
|
||||
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
numDirsDiscovered.setZero();
|
||||
numDirsMatched.setZero();
|
||||
|
||||
// walk over the directories until we reach a certain level and then pass the direcories to
|
||||
// gather slaves to parallelize it
|
||||
targetPath = target.getPath().str();
|
||||
chunksPath = targetPath + "/" + CONFIG_BUDDYMIRROR_SUBDIR_NAME;
|
||||
|
||||
lastBuddyComm = target.getLastBuddyComm();
|
||||
buddyCommIsOverride = lastBuddyComm.first;
|
||||
lastBuddyCommTimeSecs = std::chrono::system_clock::to_time_t(lastBuddyComm.second);
|
||||
|
||||
lastBuddyCommSafetyThresholdSecs = app->getConfig()->getSysResyncSafetyThresholdMins()*60;
|
||||
if ( (lastBuddyCommSafetyThresholdSecs == 0) && (!buddyCommIsOverride) ) // ignore timestamp file
|
||||
lastBuddyCommTimeSecs = 0;
|
||||
else
|
||||
if (lastBuddyCommTimeSecs > lastBuddyCommSafetyThresholdSecs)
|
||||
lastBuddyCommTimeSecs -= lastBuddyCommSafetyThresholdSecs;
|
||||
|
||||
checkTopLevelDirRes = checkTopLevelDir(chunksPath, lastBuddyCommTimeSecs);
|
||||
if (!checkTopLevelDirRes)
|
||||
{
|
||||
setStatus(BuddyResyncJobState_FAILURE);
|
||||
|
||||
// terminate gather slaves
|
||||
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
|
||||
gatherSlaveVec[i]->selfTerminate();
|
||||
|
||||
// terminate sync slaves
|
||||
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
|
||||
fileSyncSlaveVec[i]->selfTerminate();
|
||||
|
||||
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
|
||||
dirSyncSlaveVec[i]->selfTerminate();
|
||||
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
walkRes = walkDirs(chunksPath, "", 0, lastBuddyCommTimeSecs);
|
||||
if (!walkRes)
|
||||
{
|
||||
setStatus(BuddyResyncJobState_FAILURE);
|
||||
|
||||
// terminate gather slaves
|
||||
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
|
||||
gatherSlaveVec[i]->selfTerminate();
|
||||
|
||||
// terminate sync slaves
|
||||
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
|
||||
fileSyncSlaveVec[i]->selfTerminate();
|
||||
|
||||
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
|
||||
dirSyncSlaveVec[i]->selfTerminate();
|
||||
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// all directories are read => tell gather slave to stop when work queue is empty and wait for
|
||||
// all to stop
|
||||
for(size_t i = 0; i < gatherSlaveVec.size(); i++)
|
||||
{
|
||||
if (likely(shallAbort.read() == 0))
|
||||
gatherSlaveVec[i]->setOnlyTerminateIfIdle(true);
|
||||
else
|
||||
gatherSlaveVec[i]->setOnlyTerminateIfIdle(false);
|
||||
|
||||
gatherSlaveVec[i]->selfTerminate();
|
||||
}
|
||||
|
||||
joinGatherSlaves();
|
||||
|
||||
// gather slaves have finished => tell sync slaves to stop when work packages are empty and wait
|
||||
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
|
||||
{
|
||||
if (likely(shallAbort.read() == 0))
|
||||
fileSyncSlaveVec[i]->setOnlyTerminateIfIdle(true);
|
||||
else
|
||||
fileSyncSlaveVec[i]->setOnlyTerminateIfIdle(false);
|
||||
|
||||
fileSyncSlaveVec[i]->selfTerminate();
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < dirSyncSlaveVec.size(); i++)
|
||||
{
|
||||
if (likely(shallAbort.read() == 0))
|
||||
dirSyncSlaveVec[i]->setOnlyTerminateIfIdle(true);
|
||||
else
|
||||
dirSyncSlaveVec[i]->setOnlyTerminateIfIdle(false);
|
||||
|
||||
dirSyncSlaveVec[i]->selfTerminate();
|
||||
}
|
||||
|
||||
joinSyncSlaves();
|
||||
|
||||
cleanup:
|
||||
// wait for gather slaves to stop
|
||||
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin();
|
||||
iter != gatherSlaveVec.end(); iter++)
|
||||
{
|
||||
BuddyResyncerGatherSlave* slave = *iter;
|
||||
if(slave)
|
||||
{
|
||||
std::lock_guard<Mutex> safeLock(slave->statusMutex);
|
||||
while (slave->isRunning)
|
||||
slave->isRunningChangeCond.wait(&(slave->statusMutex));
|
||||
}
|
||||
}
|
||||
|
||||
bool syncErrors = false;
|
||||
|
||||
// wait for sync slaves to stop and save if any errors occured
|
||||
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
|
||||
iter != fileSyncSlaveVec.end(); iter++)
|
||||
{
|
||||
BuddyResyncerFileSyncSlave* slave = *iter;
|
||||
if(slave)
|
||||
{
|
||||
{
|
||||
std::lock_guard<Mutex> safeLock(slave->statusMutex);
|
||||
while (slave->isRunning)
|
||||
slave->isRunningChangeCond.wait(&(slave->statusMutex));
|
||||
}
|
||||
|
||||
if (slave->getErrorCount() != 0)
|
||||
syncErrors = true;
|
||||
}
|
||||
}
|
||||
|
||||
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
|
||||
iter != dirSyncSlaveVec.end(); iter++)
|
||||
{
|
||||
BuddyResyncerDirSyncSlave* slave = *iter;
|
||||
if(slave)
|
||||
{
|
||||
{
|
||||
std::lock_guard<Mutex> safeLock(slave->statusMutex);
|
||||
while (slave->isRunning)
|
||||
slave->isRunningChangeCond.wait(&(slave->statusMutex));
|
||||
}
|
||||
|
||||
if (slave->getErrorCount() != 0)
|
||||
syncErrors = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (getStatus() == BuddyResyncJobState_RUNNING) // status not set to anything special
|
||||
{ // (e.g. FAILURE)
|
||||
if (shallAbort.read() != 0) // job aborted?
|
||||
{
|
||||
setStatus(BuddyResyncJobState_INTERRUPTED);
|
||||
informBuddy();
|
||||
}
|
||||
else if (syncErrors || targetWasOffline.read()) // any sync errors or success?
|
||||
{
|
||||
// we must set the buddy BAD if it has been offline during any period of time during which
|
||||
// the resync was also running. we implicitly do this during resync proper, since resync
|
||||
// slaves abort with errors if the target is offline. if the target goes offline *after*
|
||||
// the last proper resync messages has been sent and comes *back* before we try to inform
|
||||
// it we will never detect that it has been offline at all. concurrently executing
|
||||
// messages (eg TruncFile) may run between our opportunities to detect the offline state
|
||||
// and may fail to forward their actions *even though they should forward*. this would
|
||||
// lead to an inconsistent secondary. since the target has gone offline, the only
|
||||
// reasonable course of action is to fail to resync entirely.
|
||||
setStatus(BuddyResyncJobState_ERRORS);
|
||||
informBuddy();
|
||||
}
|
||||
else
|
||||
{
|
||||
setStatus(BuddyResyncJobState_SUCCESS);
|
||||
// unset timestamp override file if an override was set
|
||||
target.setLastBuddyComm(std::chrono::system_clock::from_time_t(0), true);
|
||||
// so the target went offline between the previous check "syncErrors || targetWasOffline".
|
||||
// any message that has tried to forward itself in the intervening time will have seen the
|
||||
// offline state, but will have been unable to set the buddy to needs-resync because it
|
||||
// still *is* needs-resync. the resync itself has been perfectly successful, but we have
|
||||
// to start another one anyway once the target comes back to ensure that no information
|
||||
// was lost.
|
||||
target.setBuddyNeedsResync(targetWasOffline.read());
|
||||
informBuddy();
|
||||
|
||||
if (targetWasOffline.read())
|
||||
LOG(MIRRORING, WARNING,
|
||||
"Resync successful, but target went offline during finalization. "
|
||||
"Setting target to needs-resync again.", targetID);
|
||||
}
|
||||
}
|
||||
|
||||
target.setBuddyResyncInProgress(false);
|
||||
endTime = time(NULL);
|
||||
}
|
||||
|
||||
void BuddyResyncJob::abort()
|
||||
{
|
||||
shallAbort.set(1); // tell the file walk in this class to abort
|
||||
|
||||
// set setOnlyTerminateIfIdle on the slaves to false; they will be stopped by the main loop then
|
||||
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin(); iter != gatherSlaveVec.end();
|
||||
iter++)
|
||||
{
|
||||
BuddyResyncerGatherSlave* slave = *iter;
|
||||
if(slave)
|
||||
{
|
||||
slave->setOnlyTerminateIfIdle(false);
|
||||
}
|
||||
}
|
||||
|
||||
// stop sync slaves
|
||||
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
|
||||
iter != fileSyncSlaveVec.end(); iter++)
|
||||
{
|
||||
BuddyResyncerFileSyncSlave* slave = *iter;
|
||||
if(slave)
|
||||
{
|
||||
slave->setOnlyTerminateIfIdle(false);
|
||||
}
|
||||
}
|
||||
|
||||
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
|
||||
iter != dirSyncSlaveVec.end(); iter++)
|
||||
{
|
||||
BuddyResyncerDirSyncSlave* slave = *iter;
|
||||
if(slave)
|
||||
{
|
||||
slave->setOnlyTerminateIfIdle(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool BuddyResyncJob::startGatherSlaves(const StorageTarget& target)
|
||||
{
|
||||
// create a gather slaves if they don't exist yet and start them
|
||||
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
|
||||
{
|
||||
if(!gatherSlaveVec[i])
|
||||
gatherSlaveVec[i] = new BuddyResyncerGatherSlave(target, &syncCandidates,
|
||||
&gatherSlavesWorkQueue, i);
|
||||
|
||||
try
|
||||
{
|
||||
gatherSlaveVec[i]->resetSelfTerminate();
|
||||
gatherSlaveVec[i]->start();
|
||||
gatherSlaveVec[i]->setIsRunning(true);
|
||||
}
|
||||
catch (PThreadCreateException& e)
|
||||
{
|
||||
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BuddyResyncJob::startSyncSlaves()
|
||||
{
|
||||
// create sync slaves and start them
|
||||
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
|
||||
{
|
||||
if(!fileSyncSlaveVec[i])
|
||||
fileSyncSlaveVec[i] = new BuddyResyncerFileSyncSlave(targetID, &syncCandidates, i);
|
||||
|
||||
try
|
||||
{
|
||||
fileSyncSlaveVec[i]->resetSelfTerminate();
|
||||
fileSyncSlaveVec[i]->start();
|
||||
fileSyncSlaveVec[i]->setIsRunning(true);
|
||||
}
|
||||
catch (PThreadCreateException& e)
|
||||
{
|
||||
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
|
||||
|
||||
// stop already started sync slaves
|
||||
for(size_t j = 0; j < i; j++)
|
||||
fileSyncSlaveVec[j]->selfTerminate();
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < dirSyncSlaveVec.size(); i++)
|
||||
{
|
||||
if(!dirSyncSlaveVec[i])
|
||||
dirSyncSlaveVec[i] = new BuddyResyncerDirSyncSlave(targetID, &syncCandidates, i);
|
||||
|
||||
try
|
||||
{
|
||||
dirSyncSlaveVec[i]->resetSelfTerminate();
|
||||
dirSyncSlaveVec[i]->start();
|
||||
dirSyncSlaveVec[i]->setIsRunning(true);
|
||||
}
|
||||
catch (PThreadCreateException& e)
|
||||
{
|
||||
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
|
||||
|
||||
// stop already started sync slaves
|
||||
for (size_t j = 0; j < fileSyncSlaveVec.size(); j++)
|
||||
fileSyncSlaveVec[j]->selfTerminate();
|
||||
|
||||
for (size_t j = 0; j < i; j++)
|
||||
dirSyncSlaveVec[j]->selfTerminate();
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void BuddyResyncJob::joinGatherSlaves()
|
||||
{
|
||||
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
|
||||
gatherSlaveVec[i]->join();
|
||||
}
|
||||
|
||||
void BuddyResyncJob::joinSyncSlaves()
|
||||
{
|
||||
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
|
||||
fileSyncSlaveVec[i]->join();
|
||||
|
||||
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
|
||||
dirSyncSlaveVec[i]->join();
|
||||
}
|
||||
|
||||
void BuddyResyncJob::getJobStats(StorageBuddyResyncJobStatistics& outStats)
|
||||
{
|
||||
uint64_t discoveredFiles = 0;
|
||||
uint64_t matchedFiles = 0;
|
||||
uint64_t discoveredDirs = numDirsDiscovered.read();
|
||||
uint64_t matchedDirs = numDirsMatched.read();
|
||||
uint64_t syncedFiles = 0;
|
||||
uint64_t syncedDirs = 0;
|
||||
uint64_t errorFiles = 0;
|
||||
uint64_t errorDirs = 0;
|
||||
|
||||
for(size_t i = 0; i < gatherSlaveVec.size(); i++)
|
||||
{
|
||||
BuddyResyncerGatherSlave* slave = gatherSlaveVec[i];
|
||||
if(slave)
|
||||
{
|
||||
uint64_t tmpDiscoveredFiles = 0;
|
||||
uint64_t tmpMatchedFiles = 0;
|
||||
uint64_t tmpDiscoveredDirs = 0;
|
||||
uint64_t tmpMatchedDirs = 0;
|
||||
slave->getCounters(tmpDiscoveredFiles, tmpMatchedFiles, tmpDiscoveredDirs, tmpMatchedDirs);
|
||||
|
||||
discoveredFiles += tmpDiscoveredFiles;
|
||||
matchedFiles += tmpMatchedFiles;
|
||||
discoveredDirs += tmpDiscoveredDirs;
|
||||
matchedDirs += tmpMatchedDirs;
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
|
||||
{
|
||||
BuddyResyncerFileSyncSlave* slave = fileSyncSlaveVec[i];
|
||||
if(slave)
|
||||
{
|
||||
syncedFiles += slave->getNumChunksSynced();
|
||||
errorFiles += slave->getErrorCount();
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
|
||||
{
|
||||
BuddyResyncerDirSyncSlave* slave = dirSyncSlaveVec[i];
|
||||
if (slave)
|
||||
{
|
||||
syncedDirs += slave->getNumDirsSynced();
|
||||
discoveredDirs += slave->getNumAdditionalDirsMatched();
|
||||
matchedDirs += slave->getNumAdditionalDirsMatched();
|
||||
errorDirs += slave->getErrorCount();
|
||||
}
|
||||
}
|
||||
|
||||
outStats = StorageBuddyResyncJobStatistics(status, startTime, endTime, discoveredFiles,
|
||||
discoveredDirs, matchedFiles, matchedDirs, syncedFiles, syncedDirs, errorFiles, errorDirs);
|
||||
}
|
||||
|
||||
void BuddyResyncJob::informBuddy()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
NodeStore* storageNodes = app->getStorageNodes();
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
|
||||
TargetMapper* targetMapper = app->getTargetMapper();
|
||||
|
||||
BuddyResyncJobState status = getStatus();
|
||||
TargetConsistencyState newTargetState;
|
||||
if ( (status == BuddyResyncJobState_ERRORS) || (status == BuddyResyncJobState_INTERRUPTED))
|
||||
newTargetState = TargetConsistencyState_BAD;
|
||||
else
|
||||
if (status == BuddyResyncJobState_SUCCESS)
|
||||
newTargetState = TargetConsistencyState_GOOD;
|
||||
else
|
||||
{
|
||||
LogContext(__func__).log(Log_NOTICE, "Refusing to set a state for buddy target, because "
|
||||
"resync status isn't well-defined. "
|
||||
"localTargetID: " + StringTk::uintToStr(targetID) + "; "
|
||||
"resyncState: " + StringTk::intToStr(status));
|
||||
return;
|
||||
}
|
||||
|
||||
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(targetID);
|
||||
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
|
||||
auto storageNode = storageNodes->referenceNode(buddyNodeID);
|
||||
|
||||
if (!storageNode)
|
||||
{
|
||||
LogContext(__func__).logErr(
|
||||
"Unable to inform buddy about finished resync. TargetID: " + StringTk::uintToStr(targetID)
|
||||
+ "; buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; buddyNodeID: "
|
||||
+ buddyNodeID.str() + "; error: unknown storage node");
|
||||
return;
|
||||
}
|
||||
|
||||
SetTargetConsistencyStatesRespMsg* respMsgCast;
|
||||
FhgfsOpsErr result;
|
||||
UInt16List targetIDs;
|
||||
UInt8List states;
|
||||
|
||||
targetIDs.push_back(buddyTargetID);
|
||||
states.push_back(newTargetState);
|
||||
|
||||
SetTargetConsistencyStatesMsg msg(NODETYPE_Storage, &targetIDs, &states, false);
|
||||
|
||||
const auto respMsg = MessagingTk::requestResponse(*storageNode, msg,
|
||||
NETMSGTYPE_SetTargetConsistencyStatesResp);
|
||||
if (!respMsg)
|
||||
{
|
||||
LogContext(__func__).logErr(
|
||||
"Unable to inform buddy about finished resync. "
|
||||
"targetID: " + StringTk::uintToStr(targetID) + "; "
|
||||
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
|
||||
"buddyNodeID: " + buddyNodeID.str() + "; "
|
||||
"error: Communication error");
|
||||
return;
|
||||
}
|
||||
|
||||
respMsgCast = (SetTargetConsistencyStatesRespMsg*) respMsg.get();
|
||||
result = respMsgCast->getResult();
|
||||
|
||||
if(result != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
LogContext(__func__).logErr(
|
||||
"Error while informing buddy about finished resync. "
|
||||
"targetID: " + StringTk::uintToStr(targetID) + "; "
|
||||
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
|
||||
"buddyNodeID: " + buddyNodeID.str() + "; "
|
||||
"error: " + boost::lexical_cast<std::string>(result));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* check the CONFIG_BUDDYMIRROR_SUBDIR_NAME directory
|
||||
*/
|
||||
bool BuddyResyncJob::checkTopLevelDir(std::string& path, int64_t lastBuddyCommTimeSecs)
|
||||
{
|
||||
struct stat statBuf;
|
||||
int statRes = stat(path.c_str(), &statBuf);
|
||||
|
||||
if(statRes != 0)
|
||||
{
|
||||
LogContext(__func__).log(Log_WARNING,
|
||||
"Couldn't stat chunks directory; resync job can't run. targetID: "
|
||||
+ StringTk::uintToStr(targetID) + "; path: " + path
|
||||
+ "; Error: " + System::getErrString(errno));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
numDirsDiscovered.increase();
|
||||
int64_t dirMTime = (int64_t) statBuf.st_mtim.tv_sec;
|
||||
if(dirMTime > lastBuddyCommTimeSecs)
|
||||
{ // sync candidate
|
||||
ChunkSyncCandidateDir candidate("", targetID);
|
||||
syncCandidates.add(candidate, this);
|
||||
numDirsMatched.increase();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* recursively walk through buddy mir directory until a depth of BUDDYRESYNCJOB_MAXDIRWALKDEPTH is
|
||||
* reached; everything with a greater depth gets passed to the GatherSlaves to work on it in
|
||||
* parallel
|
||||
*/
|
||||
bool BuddyResyncJob::walkDirs(std::string chunksPath, std::string relPath, int level,
|
||||
int64_t lastBuddyCommTimeSecs)
|
||||
{
|
||||
bool retVal = true;
|
||||
|
||||
DIR* dirHandle;
|
||||
struct dirent* dirEntry;
|
||||
|
||||
dirHandle = opendir(std::string(chunksPath + "/" + relPath).c_str());
|
||||
|
||||
if(!dirHandle)
|
||||
{
|
||||
LogContext(__func__).logErr("Unable to open path. "
|
||||
"targetID: " + StringTk::uintToStr(targetID) + "; "
|
||||
"Rel. path: " + relPath + "; "
|
||||
"Error: " + System::getErrString(errno) );
|
||||
return false;
|
||||
}
|
||||
|
||||
while ((dirEntry = StorageTk::readdirFiltered(dirHandle)) != NULL)
|
||||
{
|
||||
if(shallAbort.read() != 0)
|
||||
break;
|
||||
|
||||
// get stat info
|
||||
std::string currentRelPath;
|
||||
if(unlikely(relPath.empty()))
|
||||
currentRelPath = dirEntry->d_name;
|
||||
else
|
||||
currentRelPath = relPath + "/" + dirEntry->d_name;
|
||||
|
||||
std::string currentFullPath = chunksPath + "/" + currentRelPath;
|
||||
struct stat statBuf;
|
||||
int statRes = stat(currentFullPath.c_str(), &statBuf);
|
||||
|
||||
if(statRes != 0)
|
||||
{
|
||||
LogContext(__func__).log(Log_WARNING,
|
||||
"Couldn't stat directory, which was discovered previously. Resync job might not be "
|
||||
"complete. targetID " + StringTk::uintToStr(targetID) + "; "
|
||||
"Rel. path: " + relPath + "; "
|
||||
"Error: " + System::getErrString(errno));
|
||||
|
||||
retVal = false;
|
||||
|
||||
break; // => one error aborts it all
|
||||
}
|
||||
|
||||
if(S_ISDIR(statBuf.st_mode))
|
||||
{
|
||||
// if level of dir is smaller than max, take care of it and recurse into it
|
||||
if(level < BUDDYRESYNCJOB_MAXDIRWALKDEPTH)
|
||||
{
|
||||
numDirsDiscovered.increase();
|
||||
int64_t dirMTime = (int64_t) statBuf.st_mtim.tv_sec;
|
||||
if(dirMTime > lastBuddyCommTimeSecs)
|
||||
{ // sync candidate
|
||||
ChunkSyncCandidateDir candidate(currentRelPath, targetID);
|
||||
syncCandidates.add(candidate, this);
|
||||
numDirsMatched.increase();
|
||||
}
|
||||
|
||||
bool walkRes = walkDirs(chunksPath, currentRelPath, level+1, lastBuddyCommTimeSecs);
|
||||
|
||||
if (!walkRes)
|
||||
retVal = false;
|
||||
}
|
||||
else
|
||||
// otherwise pass it to the slaves; NOTE: gather slave takes full path
|
||||
gatherSlavesWorkQueue.add(currentFullPath, this);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_DEBUG(__func__, Log_WARNING, "Found a file in directory structure");
|
||||
}
|
||||
}
|
||||
|
||||
if(!dirEntry && errno) // error occured
|
||||
{
|
||||
LogContext(__func__).logErr(
|
||||
"Unable to read all directories; chunksPath: " + chunksPath + "; relativePath: " + relPath
|
||||
+ "; SysErr: " + System::getErrString(errno));
|
||||
|
||||
retVal = false;
|
||||
}
|
||||
|
||||
int closedirRes = closedir(dirHandle);
|
||||
if (closedirRes != 0)
|
||||
LOG_DEBUG(__func__, Log_WARNING,
|
||||
"Unable to open path. targetID " + StringTk::uintToStr(targetID) + "; Rel. path: "
|
||||
+ relPath + "; Error: " + System::getErrString(errno));
|
||||
|
||||
return retVal;
|
||||
}
|
||||
90
storage/source/components/buddyresyncer/BuddyResyncJob.h
Normal file
90
storage/source/components/buddyresyncer/BuddyResyncJob.h
Normal file
@@ -0,0 +1,90 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/storage/mirroring/BuddyResyncJobStatistics.h>
|
||||
#include <components/buddyresyncer/BuddyResyncerDirSyncSlave.h>
|
||||
#include <components/buddyresyncer/BuddyResyncerFileSyncSlave.h>
|
||||
#include <components/buddyresyncer/BuddyResyncerGatherSlave.h>
|
||||
|
||||
#define GATHERSLAVEQUEUE_MAXSIZE 5000
|
||||
|
||||
class BuddyResyncJob : public PThread
|
||||
{
|
||||
friend class GenericDebugMsgEx;
|
||||
|
||||
public:
|
||||
BuddyResyncJob(uint16_t targetID);
|
||||
virtual ~BuddyResyncJob();
|
||||
|
||||
virtual void run();
|
||||
|
||||
void abort();
|
||||
void getJobStats(StorageBuddyResyncJobStatistics& outStats);
|
||||
|
||||
private:
|
||||
uint16_t targetID;
|
||||
Mutex statusMutex;
|
||||
BuddyResyncJobState status;
|
||||
|
||||
int64_t startTime;
|
||||
int64_t endTime;
|
||||
|
||||
ChunkSyncCandidateStore syncCandidates;
|
||||
BuddyResyncerGatherSlaveWorkQueue gatherSlavesWorkQueue;
|
||||
|
||||
BuddyResyncerGatherSlaveVec gatherSlaveVec;
|
||||
BuddyResyncerFileSyncSlaveVec fileSyncSlaveVec;
|
||||
BuddyResyncerDirSyncSlaveVec dirSyncSlaveVec;
|
||||
|
||||
// this thread walks over the top dir structures itself, so we need to track that
|
||||
AtomicUInt64 numDirsDiscovered;
|
||||
AtomicUInt64 numDirsMatched;
|
||||
|
||||
AtomicInt16 shallAbort; // quasi-boolean
|
||||
AtomicInt16 targetWasOffline;
|
||||
|
||||
bool checkTopLevelDir(std::string& path, int64_t lastBuddyCommTimeSecs);
|
||||
bool walkDirs(std::string chunksPath, std::string relPath, int level,
|
||||
int64_t lastBuddyCommTimeSecs);
|
||||
|
||||
bool startGatherSlaves(const StorageTarget& target);
|
||||
bool startSyncSlaves();
|
||||
void joinGatherSlaves();
|
||||
void joinSyncSlaves();
|
||||
|
||||
public:
|
||||
uint16_t getTargetID() const
|
||||
{
|
||||
return targetID;
|
||||
}
|
||||
|
||||
BuddyResyncJobState getStatus()
|
||||
{
|
||||
std::lock_guard<Mutex> mutexLock(statusMutex);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool isRunning()
|
||||
{
|
||||
std::lock_guard<Mutex> mutexLock(statusMutex);
|
||||
return status == BuddyResyncJobState_RUNNING;
|
||||
}
|
||||
|
||||
void setTargetOffline()
|
||||
{
|
||||
targetWasOffline.set(1);
|
||||
}
|
||||
|
||||
private:
|
||||
void setStatus(BuddyResyncJobState status)
|
||||
{
|
||||
std::lock_guard<Mutex> mutexLock(statusMutex);
|
||||
this->status = status;
|
||||
}
|
||||
|
||||
void informBuddy();
|
||||
};
|
||||
|
||||
typedef std::map<uint16_t, BuddyResyncJob*> BuddyResyncJobMap; //mapping: targetID, job
|
||||
typedef BuddyResyncJobMap::iterator BuddyResyncJobMapIter;
|
||||
|
||||
|
||||
40
storage/source/components/buddyresyncer/BuddyResyncer.cpp
Normal file
40
storage/source/components/buddyresyncer/BuddyResyncer.cpp
Normal file
@@ -0,0 +1,40 @@
|
||||
#include <program/Program.h>
|
||||
|
||||
#include "BuddyResyncer.h"
|
||||
|
||||
BuddyResyncer::~BuddyResyncer()
|
||||
{
|
||||
// delete remaining jobs
|
||||
for (BuddyResyncJobMapIter iter = resyncJobMap.begin(); iter != resyncJobMap.end(); iter++)
|
||||
{
|
||||
BuddyResyncJob* job = iter->second;
|
||||
if( job->isRunning() )
|
||||
{
|
||||
job->abort();
|
||||
job->join();
|
||||
}
|
||||
|
||||
SAFE_DELETE(job);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return FhgfsOpsErr_SUCCESS if everything was successfully started, FhgfsOpsErr_INUSE if already
|
||||
* running
|
||||
*/
|
||||
FhgfsOpsErr BuddyResyncer::startResync(uint16_t targetID)
|
||||
{
|
||||
bool isNewJob;
|
||||
|
||||
// try to add an existing resync job; if it already exists, we get that
|
||||
BuddyResyncJob* resyncJob = addResyncJob(targetID, isNewJob);
|
||||
|
||||
// Job already exists *and* is already running:
|
||||
if (!isNewJob && resyncJob->isRunning() )
|
||||
return FhgfsOpsErr_INUSE;
|
||||
|
||||
// job is ready and not running
|
||||
resyncJob->start();
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
59
storage/source/components/buddyresyncer/BuddyResyncer.h
Normal file
59
storage/source/components/buddyresyncer/BuddyResyncer.h
Normal file
@@ -0,0 +1,59 @@
|
||||
#pragma once
|
||||
|
||||
#include <components/buddyresyncer/BuddyResyncJob.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
/**
|
||||
* This is not a component that represents a separate thread by itself. Instead, it is the
|
||||
* controlling frontend for slave threads, which are started and stopped on request (i.e. it is not
|
||||
* automatically started when the app is started).
|
||||
*
|
||||
* Callers should only use methods in this controlling frontend and not access the slave's methods
|
||||
* directly.
|
||||
*/
|
||||
class BuddyResyncer
|
||||
{
|
||||
public:
|
||||
~BuddyResyncer();
|
||||
|
||||
FhgfsOpsErr startResync(uint16_t targetID);
|
||||
|
||||
private:
|
||||
BuddyResyncJobMap resyncJobMap;
|
||||
Mutex resyncJobMapMutex;
|
||||
|
||||
public:
|
||||
BuddyResyncJob* getResyncJob(uint16_t targetID)
|
||||
{
|
||||
std::lock_guard<Mutex> mutexLock(resyncJobMapMutex);
|
||||
|
||||
BuddyResyncJobMapIter iter = resyncJobMap.find(targetID);
|
||||
if (iter != resyncJobMap.end())
|
||||
return iter->second;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
BuddyResyncJob* addResyncJob(uint16_t targetID, bool& outIsNew)
|
||||
{
|
||||
|
||||
std::lock_guard<Mutex> mutexLock(resyncJobMapMutex);
|
||||
|
||||
BuddyResyncJobMapIter iter = resyncJobMap.find(targetID);
|
||||
if (iter != resyncJobMap.end())
|
||||
{
|
||||
outIsNew = false;
|
||||
return iter->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
BuddyResyncJob* job = new BuddyResyncJob(targetID);
|
||||
resyncJobMap.insert(BuddyResyncJobMap::value_type(targetID, job) );
|
||||
outIsNew = true;
|
||||
return job;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -0,0 +1,395 @@
|
||||
#include <app/App.h>
|
||||
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
|
||||
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
|
||||
#include <common/net/message/storage/listing/ListChunkDirIncrementalMsg.h>
|
||||
#include <common/net/message/storage/listing/ListChunkDirIncrementalRespMsg.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
#include "BuddyResyncerDirSyncSlave.h"
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
#define CHECK_AT_ONCE 50
|
||||
|
||||
BuddyResyncerDirSyncSlave::BuddyResyncerDirSyncSlave(uint16_t targetID,
|
||||
ChunkSyncCandidateStore* syncCandidates, uint8_t slaveID) :
|
||||
PThread("BuddyResyncerDirSyncSlave_" + StringTk::uintToStr(targetID) + "-"
|
||||
+ StringTk::uintToStr(slaveID))
|
||||
{
|
||||
this->isRunning = false;
|
||||
this->targetID = targetID;
|
||||
this->syncCandidates = syncCandidates;
|
||||
}
|
||||
|
||||
BuddyResyncerDirSyncSlave::~BuddyResyncerDirSyncSlave()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a component, which is started through its control frontend on-demand at
|
||||
* runtime and terminates when it's done.
|
||||
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
|
||||
* of this thread running at the same time.
|
||||
*/
|
||||
void BuddyResyncerDirSyncSlave::run()
|
||||
{
|
||||
setIsRunning(true);
|
||||
|
||||
try
|
||||
{
|
||||
LogContext(__func__).log(Log_DEBUG, "Component started.");
|
||||
|
||||
registerSignalHandler();
|
||||
|
||||
numAdditionalDirsMatched.setZero();
|
||||
numDirsSynced.setZero();
|
||||
errorCount.setZero();
|
||||
|
||||
syncLoop();
|
||||
|
||||
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
PThread::getCurrentThreadApp()->handleComponentException(e);
|
||||
}
|
||||
|
||||
setIsRunning(false);
|
||||
}
|
||||
|
||||
void BuddyResyncerDirSyncSlave::syncLoop()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
while (! getSelfTerminateNotIdle())
|
||||
{
|
||||
if((syncCandidates->isDirsEmpty()) && (getSelfTerminate()))
|
||||
break;
|
||||
|
||||
ChunkSyncCandidateDir candidate;
|
||||
|
||||
syncCandidates->fetch(candidate, this);
|
||||
|
||||
if (unlikely(candidate.getTargetID() == 0)) // ignore targetID 0
|
||||
continue;
|
||||
|
||||
std::string relativePath = candidate.getRelativePath();
|
||||
uint16_t localTargetID = candidate.getTargetID();
|
||||
|
||||
// get buddy targetID
|
||||
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(localTargetID);
|
||||
// perform sync
|
||||
FhgfsOpsErr resyncRes = doSync(relativePath, localTargetID, buddyTargetID);
|
||||
if (resyncRes == FhgfsOpsErr_SUCCESS)
|
||||
numDirsSynced.increase();
|
||||
else if (resyncRes != FhgfsOpsErr_INTERRUPTED)
|
||||
errorCount.increase(); // increment error count if an error occurred; note: if the slaves
|
||||
// were interrupted from the outside (e.g. ctl) this is not an error
|
||||
}
|
||||
}
|
||||
|
||||
FhgfsOpsErr BuddyResyncerDirSyncSlave::doSync(const std::string& dirPath, uint16_t localTargetID,
|
||||
uint16_t buddyTargetID)
|
||||
{
|
||||
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
|
||||
|
||||
App* app = Program::getApp();
|
||||
TargetMapper* targetMapper = app->getTargetMapper();
|
||||
NodeStoreServers* storageNodes = app->getStorageNodes();
|
||||
|
||||
// try to find the node with the buddyTargetID
|
||||
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
|
||||
auto node = storageNodes->referenceNode(buddyNodeID);
|
||||
|
||||
if(!node)
|
||||
{
|
||||
LogContext(__func__).logErr(
|
||||
"Storage node does not exist; nodeID " + buddyNodeID.str());
|
||||
|
||||
return FhgfsOpsErr_UNKNOWNNODE;
|
||||
}
|
||||
|
||||
int64_t offset = 0;
|
||||
unsigned entriesFetched;
|
||||
|
||||
do
|
||||
{
|
||||
int64_t newOffset;
|
||||
StringList names;
|
||||
IntList entryTypes;
|
||||
|
||||
FhgfsOpsErr listRes = getBuddyDirContents(*node, dirPath, buddyTargetID, offset, names,
|
||||
entryTypes, newOffset);
|
||||
|
||||
if(listRes != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
retVal = listRes;
|
||||
break;
|
||||
}
|
||||
|
||||
offset = newOffset;
|
||||
entriesFetched = names.size();
|
||||
|
||||
// match locally
|
||||
FhgfsOpsErr findRes = findChunks(localTargetID, dirPath, names, entryTypes);
|
||||
|
||||
if(findRes != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
retVal = findRes;
|
||||
break;
|
||||
}
|
||||
|
||||
// delete the remaining chunks/dirs on the buddy
|
||||
StringList rmPaths;
|
||||
for (StringListIter iter = names.begin(); iter != names.end(); iter++)
|
||||
{
|
||||
std::string path = dirPath + "/" + *iter;
|
||||
rmPaths.push_back(path);
|
||||
}
|
||||
|
||||
FhgfsOpsErr rmRes = removeBuddyChunkPaths(*node, localTargetID, buddyTargetID, rmPaths);
|
||||
|
||||
if (rmRes != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
retVal = rmRes;
|
||||
break;
|
||||
}
|
||||
|
||||
if (getSelfTerminateNotIdle())
|
||||
{
|
||||
retVal = FhgfsOpsErr_INTERRUPTED;
|
||||
break;
|
||||
}
|
||||
|
||||
} while (entriesFetched == CHECK_AT_ONCE);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
FhgfsOpsErr BuddyResyncerDirSyncSlave::getBuddyDirContents(Node& node, const std::string& dirPath,
|
||||
uint16_t targetID, int64_t offset, StringList& outNames, IntList& outEntryTypes,
|
||||
int64_t& outNewOffset)
|
||||
{
|
||||
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
|
||||
unsigned msgRetryIntervalMS = 5000;
|
||||
|
||||
// get a part of the dir contents from the buddy target
|
||||
ListChunkDirIncrementalMsg listMsg(targetID, true, dirPath, offset, CHECK_AT_ONCE, false, true);
|
||||
listMsg.setMsgHeaderTargetID(targetID);
|
||||
|
||||
CombinedTargetState state;
|
||||
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(targetID, state);
|
||||
|
||||
// send request to node and receive response
|
||||
std::unique_ptr<NetMessage> respMsg;
|
||||
|
||||
while ( (!respMsg) && (getStateRes)
|
||||
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
|
||||
{
|
||||
respMsg = MessagingTk::requestResponse(node, listMsg, NETMSGTYPE_ListChunkDirIncrementalResp);
|
||||
|
||||
if (!respMsg)
|
||||
{
|
||||
LOG_DEBUG(__func__, Log_NOTICE,
|
||||
"Unable to communicate, but target is not offline; sleeping "
|
||||
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
|
||||
+ StringTk::uintToStr(targetID));
|
||||
|
||||
PThread::sleepMS(msgRetryIntervalMS);
|
||||
|
||||
// if thread shall terminate, break loop here
|
||||
if ( getSelfTerminateNotIdle() )
|
||||
break;
|
||||
|
||||
getStateRes = Program::getApp()->getTargetStateStore()->getState(targetID, state);
|
||||
}
|
||||
}
|
||||
|
||||
if (!respMsg)
|
||||
{ // communication error
|
||||
LogContext(__func__).logErr(
|
||||
"Communication with storage node failed: " + node.getTypedNodeID());
|
||||
|
||||
retVal = FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
else
|
||||
if(!getStateRes)
|
||||
{
|
||||
LogContext(__func__).logErr("No valid state for node ID: " + node.getTypedNodeID() );
|
||||
|
||||
retVal = FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
else
|
||||
{
|
||||
// correct response type received
|
||||
ListChunkDirIncrementalRespMsg* respMsgCast = (ListChunkDirIncrementalRespMsg*) respMsg.get();
|
||||
|
||||
FhgfsOpsErr listRes = respMsgCast->getResult();
|
||||
|
||||
if (listRes == FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
outNewOffset = respMsgCast->getNewOffset();
|
||||
respMsgCast->getNames().swap(outNames);
|
||||
respMsgCast->getEntryTypes().swap(outEntryTypes);
|
||||
}
|
||||
else
|
||||
if (listRes != FhgfsOpsErr_PATHNOTEXISTS)
|
||||
{ // not exists is ok, because path might have been deleted
|
||||
LogContext(__func__).log(Log_WARNING, "Error listing chunks dir; "
|
||||
"dirPath: " + dirPath + "; "
|
||||
"targetID: " + StringTk::uintToStr(targetID) + "; "
|
||||
"node: " + node.getTypedNodeID() + "; "
|
||||
"Error: " + boost::lexical_cast<std::string>(listRes));
|
||||
|
||||
retVal = listRes;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
FhgfsOpsErr BuddyResyncerDirSyncSlave::findChunks(uint16_t targetID, const std::string& dirPath,
|
||||
StringList& inOutNames, IntList& inOutEntryTypes)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
|
||||
|
||||
const auto& target = app->getStorageTargets()->getTargets().at(targetID);
|
||||
|
||||
const int targetFD = *target->getMirrorFD();
|
||||
|
||||
StringListIter namesIter = inOutNames.begin();
|
||||
IntListIter typesIter = inOutEntryTypes.begin();
|
||||
while (namesIter != inOutNames.end())
|
||||
{
|
||||
std::string entryID = *namesIter;
|
||||
DirEntryType entryType = (DirEntryType)*typesIter;
|
||||
|
||||
std::string entryPath;
|
||||
if (likely(!dirPath.empty()))
|
||||
entryPath = dirPath + "/" + entryID;
|
||||
else
|
||||
entryPath = entryID;
|
||||
|
||||
if (DirEntryType_ISDIR(entryType))
|
||||
{
|
||||
bool entryExists = StorageTk::pathExists(targetFD, entryPath);
|
||||
|
||||
if (!entryExists)
|
||||
{
|
||||
// dir not found, so we didn't know about it yet => add it to sync candidate store, so
|
||||
// that it gets checked and we get a list of its contents;
|
||||
ChunkSyncCandidateDir syncCandidate(entryPath, targetID);
|
||||
syncCandidates->add(syncCandidate, this);
|
||||
numAdditionalDirsMatched.increase();
|
||||
}
|
||||
|
||||
// no matter if found or not: remove it from the list, because we do not explicitely
|
||||
// delete directories on the buddy
|
||||
namesIter = inOutNames.erase(namesIter);
|
||||
typesIter = inOutEntryTypes.erase(typesIter);
|
||||
}
|
||||
else
|
||||
{
|
||||
// need to lock the chunk to check it
|
||||
chunkLockStore->lockChunk(targetID, entryID);
|
||||
|
||||
bool entryExists = StorageTk::pathExists(targetFD, entryPath);
|
||||
|
||||
if (entryExists)
|
||||
{
|
||||
// chunk found => delete it from list an unlock it
|
||||
namesIter = inOutNames.erase(namesIter);
|
||||
typesIter = inOutEntryTypes.erase(typesIter);
|
||||
chunkLockStore->unlockChunk(targetID, entryID);
|
||||
}
|
||||
else
|
||||
{
|
||||
// chunk not found => keep lock; will be unlocked after removal
|
||||
namesIter++;
|
||||
typesIter++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
FhgfsOpsErr BuddyResyncerDirSyncSlave::removeBuddyChunkPaths(Node& node, uint16_t localTargetID,
|
||||
uint16_t buddyTargetID, StringList& paths)
|
||||
{
|
||||
unsigned msgRetryIntervalMS = 5000;
|
||||
|
||||
ChunkLockStore* chunkLockStore = Program::getApp()->getChunkLockStore();
|
||||
RmChunkPathsMsg rmMsg(buddyTargetID, &paths);
|
||||
rmMsg.addMsgHeaderFeatureFlag(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR);
|
||||
rmMsg.setMsgHeaderTargetID(buddyTargetID);
|
||||
|
||||
CombinedTargetState state;
|
||||
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
|
||||
|
||||
// send request to node and receive response
|
||||
std::unique_ptr<NetMessage> respMsg;
|
||||
|
||||
while ((!respMsg) && (getStateRes)
|
||||
&& (state.reachabilityState != TargetReachabilityState_OFFLINE))
|
||||
{
|
||||
respMsg = MessagingTk::requestResponse(node, rmMsg, NETMSGTYPE_RmChunkPathsResp);
|
||||
|
||||
if (!respMsg)
|
||||
{
|
||||
LOG_DEBUG(__func__, Log_NOTICE,
|
||||
"Unable to communicate, but target is not offline; sleeping "
|
||||
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
|
||||
+ StringTk::uintToStr(targetID));
|
||||
PThread::sleepMS(msgRetryIntervalMS);
|
||||
|
||||
// if thread shall terminate, break loop here
|
||||
if ( getSelfTerminateNotIdle() )
|
||||
break;
|
||||
|
||||
getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
|
||||
}
|
||||
}
|
||||
|
||||
// no matter if that succeeded or not we unlock all chunks here first
|
||||
for (StringListIter iter = paths.begin(); iter != paths.end(); iter++)
|
||||
{
|
||||
std::string entryID = StorageTk::getPathBasename(*iter);
|
||||
chunkLockStore->unlockChunk(localTargetID, entryID);
|
||||
}
|
||||
|
||||
if (!respMsg)
|
||||
{ // communication error
|
||||
LogContext(__func__).logErr(
|
||||
"Communication with storage node failed: " + node.getTypedNodeID());
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
else
|
||||
if(!getStateRes)
|
||||
{
|
||||
LogContext(__func__).logErr("No valid state for node ID: " + node.getTypedNodeID() );
|
||||
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
else
|
||||
{
|
||||
// correct response type received
|
||||
RmChunkPathsRespMsg* respMsgCast = (RmChunkPathsRespMsg*) respMsg.get();
|
||||
StringList& failedPaths = respMsgCast->getFailedPaths();
|
||||
|
||||
for(StringListIter iter = failedPaths.begin(); iter != failedPaths.end(); iter++)
|
||||
{
|
||||
LogContext(__func__).logErr("Chunk path could not be deleted; "
|
||||
"path: " + *iter + "; "
|
||||
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
|
||||
"node: " + node.getTypedNodeID());
|
||||
}
|
||||
}
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/threading/PThread.h>
|
||||
#include <components/buddyresyncer/SyncCandidate.h>
|
||||
|
||||
class BuddyResyncerDirSyncSlave : public PThread
|
||||
{
|
||||
friend class BuddyResyncer; // (to grant access to internal mutex)
|
||||
friend class BuddyResyncJob; // (to grant access to internal mutex)
|
||||
|
||||
public:
|
||||
BuddyResyncerDirSyncSlave(uint16_t targetID, ChunkSyncCandidateStore* syncCandidates,
|
||||
uint8_t slaveID);
|
||||
virtual ~BuddyResyncerDirSyncSlave();
|
||||
|
||||
private:
|
||||
Mutex statusMutex; // protects isRunning
|
||||
Condition isRunningChangeCond;
|
||||
|
||||
AtomicSizeT onlyTerminateIfIdle;
|
||||
|
||||
AtomicUInt64 numDirsSynced;
|
||||
AtomicUInt64 numAdditionalDirsMatched;
|
||||
AtomicUInt64 errorCount;
|
||||
|
||||
bool isRunning; // true if an instance of this component is currently running
|
||||
|
||||
uint16_t targetID;
|
||||
ChunkSyncCandidateStore* syncCandidates;
|
||||
|
||||
virtual void run();
|
||||
void syncLoop();
|
||||
|
||||
FhgfsOpsErr doSync(const std::string& dirPath, uint16_t localTargetID,
|
||||
uint16_t buddyTargetID);
|
||||
FhgfsOpsErr getBuddyDirContents(Node& node, const std::string& dirPath, uint16_t targetID,
|
||||
int64_t offset, StringList& outNames, IntList& outEntryTypes, int64_t& outNewOffset);
|
||||
FhgfsOpsErr findChunks(uint16_t targetID, const std::string& dirPath, StringList& inOutNames,
|
||||
IntList& inOutEntryTypes);
|
||||
FhgfsOpsErr removeBuddyChunkPaths(Node& node, uint16_t localTargetID, uint16_t buddyTargetID,
|
||||
StringList& paths);
|
||||
|
||||
public:
|
||||
// getters & setters
|
||||
bool getIsRunning()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
return this->isRunning;
|
||||
}
|
||||
|
||||
void setOnlyTerminateIfIdle(bool value)
|
||||
{
|
||||
if (value)
|
||||
onlyTerminateIfIdle.set(1);
|
||||
else
|
||||
onlyTerminateIfIdle.setZero();
|
||||
}
|
||||
|
||||
bool getOnlyTerminateIfIdle()
|
||||
{
|
||||
if (onlyTerminateIfIdle.read() == 0)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t getNumDirsSynced()
|
||||
{
|
||||
return numDirsSynced.read();
|
||||
}
|
||||
|
||||
uint64_t getNumAdditionalDirsMatched()
|
||||
{
|
||||
return numAdditionalDirsMatched.read();
|
||||
}
|
||||
|
||||
uint64_t getErrorCount()
|
||||
{
|
||||
return errorCount.read();
|
||||
}
|
||||
|
||||
private:
|
||||
// getters & setters
|
||||
|
||||
void setIsRunning(bool isRunning)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
this->isRunning = isRunning;
|
||||
isRunningChangeCond.broadcast();
|
||||
}
|
||||
|
||||
bool getSelfTerminateNotIdle()
|
||||
{
|
||||
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::list<BuddyResyncerDirSyncSlave*> BuddyResyncerDirSyncSlaveList;
|
||||
typedef BuddyResyncerDirSyncSlaveList::iterator BuddyResyncerDirSyncSlaveListIter;
|
||||
typedef std::vector<BuddyResyncerDirSyncSlave*> BuddyResyncerDirSyncSlaveVec;
|
||||
typedef BuddyResyncerDirSyncSlaveVec::iterator BuddyResyncerDirSyncSlaveVecIter;
|
||||
|
||||
@@ -0,0 +1,471 @@
|
||||
#include <app/App.h>
|
||||
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
|
||||
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
|
||||
#include <common/net/message/storage/mirroring/ResyncLocalFileMsg.h>
|
||||
#include <common/net/message/storage/mirroring/ResyncLocalFileRespMsg.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
#include "BuddyResyncerFileSyncSlave.h"
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
#define PROCESS_AT_ONCE 1
|
||||
#define SYNC_BLOCK_SIZE (1024*1024) // 1M
|
||||
|
||||
BuddyResyncerFileSyncSlave::BuddyResyncerFileSyncSlave(uint16_t targetID,
|
||||
ChunkSyncCandidateStore* syncCandidates, uint8_t slaveID) :
|
||||
PThread("BuddyResyncerFileSyncSlave_" + StringTk::uintToStr(targetID) + "-"
|
||||
+ StringTk::uintToStr(slaveID))
|
||||
{
|
||||
this->isRunning = false;
|
||||
this->syncCandidates = syncCandidates;
|
||||
this->targetID = targetID;
|
||||
}
|
||||
|
||||
BuddyResyncerFileSyncSlave::~BuddyResyncerFileSyncSlave()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a component, which is started through its control frontend on-demand at
|
||||
* runtime and terminates when it's done.
|
||||
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
|
||||
* of this thread running at the same time.
|
||||
*/
|
||||
void BuddyResyncerFileSyncSlave::run()
|
||||
{
|
||||
setIsRunning(true);
|
||||
|
||||
try
|
||||
{
|
||||
LogContext(__func__).log(Log_DEBUG, "Component started.");
|
||||
|
||||
registerSignalHandler();
|
||||
|
||||
numChunksSynced.setZero();
|
||||
errorCount.setZero();
|
||||
|
||||
syncLoop();
|
||||
|
||||
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
PThread::getCurrentThreadApp()->handleComponentException(e);
|
||||
}
|
||||
|
||||
setIsRunning(false);
|
||||
}
|
||||
|
||||
void BuddyResyncerFileSyncSlave::syncLoop()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
while (! getSelfTerminateNotIdle())
|
||||
{
|
||||
if((syncCandidates->isFilesEmpty()) && (getSelfTerminate()))
|
||||
break;
|
||||
|
||||
ChunkSyncCandidateFile candidate;
|
||||
|
||||
syncCandidates->fetch(candidate, this);
|
||||
|
||||
if (unlikely(candidate.getTargetID() == 0)) // ignore targetID 0
|
||||
continue;
|
||||
|
||||
std::string relativePath = candidate.getRelativePath();
|
||||
uint16_t localTargetID = candidate.getTargetID();
|
||||
|
||||
// get buddy targetID
|
||||
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(localTargetID);
|
||||
// perform sync
|
||||
FhgfsOpsErr resyncRes = doResync(relativePath, localTargetID, buddyTargetID);
|
||||
if (resyncRes == FhgfsOpsErr_SUCCESS)
|
||||
numChunksSynced.increase();
|
||||
else
|
||||
if (resyncRes != FhgfsOpsErr_INTERRUPTED)
|
||||
errorCount.increase();
|
||||
}
|
||||
}
|
||||
|
||||
FhgfsOpsErr BuddyResyncerFileSyncSlave::doResync(std::string& chunkPathStr, uint16_t localTargetID,
|
||||
uint16_t buddyTargetID)
|
||||
{
|
||||
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
|
||||
unsigned msgRetryIntervalMS = 5000;
|
||||
|
||||
App* app = Program::getApp();
|
||||
TargetMapper* targetMapper = app->getTargetMapper();
|
||||
NodeStoreServers* storageNodes = app->getStorageNodes();
|
||||
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
|
||||
|
||||
std::string entryID = StorageTk::getPathBasename(chunkPathStr);
|
||||
|
||||
// try to find the node with the buddyTargetID
|
||||
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
|
||||
|
||||
auto node = storageNodes->referenceNode(buddyNodeID);
|
||||
|
||||
if(!node)
|
||||
{
|
||||
LogContext(__func__).log(Log_WARNING,
|
||||
"Storage node does not exist; nodeID " + buddyNodeID.str());
|
||||
|
||||
return FhgfsOpsErr_UNKNOWNNODE;
|
||||
}
|
||||
|
||||
int64_t offset = 0;
|
||||
ssize_t readRes = 0;
|
||||
unsigned resyncMsgFlags = 0;
|
||||
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_BUDDYMIRROR;
|
||||
|
||||
LogContext(__func__).log(Log_DEBUG,
|
||||
"File sync started. chunkPath: " + chunkPathStr + "; localTargetID: "
|
||||
+ StringTk::uintToStr(localTargetID) + "; buddyTargetID"
|
||||
+ StringTk::uintToStr(buddyTargetID));
|
||||
|
||||
do
|
||||
{
|
||||
boost::scoped_array<char> data(new char[SYNC_BLOCK_SIZE]);
|
||||
|
||||
const auto& target = app->getStorageTargets()->getTargets().at(localTargetID);
|
||||
|
||||
// lock the chunk
|
||||
chunkLockStore->lockChunk(localTargetID, entryID);
|
||||
|
||||
const int fd = openat(*target->getMirrorFD(), chunkPathStr.c_str(), O_RDONLY | O_NOATIME);
|
||||
|
||||
if (fd == -1)
|
||||
{
|
||||
int errCode = errno;
|
||||
|
||||
if(errCode == ENOENT)
|
||||
{ // chunk was deleted => no error
|
||||
// delete the mirror chunk and return
|
||||
bool rmRes = removeBuddyChunkUnlocked(*node, buddyTargetID, chunkPathStr);
|
||||
|
||||
if (!rmRes) // rm failed; stop resync
|
||||
{
|
||||
LogContext(__func__).log(Log_WARNING,
|
||||
"File sync not started. chunkPath: " + chunkPathStr + "; localTargetID: "
|
||||
+ StringTk::uintToStr(localTargetID) + "; buddyTargetID: "
|
||||
+ StringTk::uintToStr(buddyTargetID));
|
||||
|
||||
retVal = FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
}
|
||||
else // error => log and return
|
||||
{
|
||||
LogContext(__func__).logErr(
|
||||
"Open of chunk failed. chunkPath: " + chunkPathStr + "; targetID: "
|
||||
+ StringTk::uintToStr(localTargetID) + "; Error: "
|
||||
+ System::getErrString(errCode));
|
||||
|
||||
retVal = FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
chunkLockStore->unlockChunk(localTargetID, entryID);
|
||||
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
int seekRes = lseek(fd, offset, SEEK_SET);
|
||||
|
||||
if (seekRes == -1)
|
||||
{
|
||||
LogContext(__func__).logErr(
|
||||
"Seeking in chunk failed. chunkPath: " + chunkPathStr + "; targetID: "
|
||||
+ StringTk::uintToStr(localTargetID) + "; offset: " + StringTk::int64ToStr(offset));
|
||||
|
||||
chunkLockStore->unlockChunk(localTargetID, entryID);
|
||||
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
readRes = read(fd, data.get(), SYNC_BLOCK_SIZE);
|
||||
|
||||
if( readRes == -1)
|
||||
{
|
||||
LogContext(__func__).logErr("Error during read; "
|
||||
"chunkPath: " + chunkPathStr + "; "
|
||||
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
|
||||
"BuddyNode: " + node->getTypedNodeID() + "; "
|
||||
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
|
||||
"Error: " + System::getErrString(errno));
|
||||
|
||||
retVal = FhgfsOpsErr_INTERNAL;
|
||||
|
||||
goto end_of_loop;
|
||||
}
|
||||
|
||||
if(readRes > 0)
|
||||
{
|
||||
const char zeroBuf[RESYNCER_SPARSE_BLOCK_SIZE] = { 0 };
|
||||
|
||||
// check if sparse blocks are in the buffer
|
||||
ssize_t bufPos = 0;
|
||||
bool dataFound = false;
|
||||
while (bufPos < readRes)
|
||||
{
|
||||
size_t cmpLen = BEEGFS_MIN(readRes-bufPos, RESYNCER_SPARSE_BLOCK_SIZE);
|
||||
|
||||
int cmpRes = memcmp(data.get() + bufPos, zeroBuf, cmpLen);
|
||||
if(cmpRes != 0)
|
||||
dataFound = true;
|
||||
else // sparse area detected
|
||||
{
|
||||
if(dataFound) // had data before
|
||||
{
|
||||
resyncMsgFlags |= RESYNCLOCALFILEMSG_CHECK_SPARSE; // let the receiver do a check
|
||||
break; // and stop checking here
|
||||
}
|
||||
}
|
||||
|
||||
bufPos += cmpLen;
|
||||
}
|
||||
|
||||
// this inner loop is over and there are only sparse areas
|
||||
|
||||
/* make sure we always send a msg at offset==0 to truncate the file and allow concurrent
|
||||
writers in a big inital sparse area */
|
||||
if(offset && (readRes > 0) && (readRes == SYNC_BLOCK_SIZE) && !dataFound)
|
||||
{
|
||||
goto end_of_loop;
|
||||
// => no transfer needed
|
||||
}
|
||||
|
||||
/* let the receiver do a check, because we might be sending a sparse block at beginnig or
|
||||
end of file */
|
||||
if(!dataFound)
|
||||
resyncMsgFlags |= RESYNCLOCALFILEMSG_CHECK_SPARSE;
|
||||
}
|
||||
|
||||
{
|
||||
ResyncLocalFileMsg resyncMsg(data.get(), chunkPathStr, buddyTargetID, offset, readRes);
|
||||
|
||||
if (!readRes || (readRes < SYNC_BLOCK_SIZE) ) // last iteration, set attribs and trunc buddy chunk
|
||||
{
|
||||
struct stat statBuf;
|
||||
int statRes = fstat(fd, &statBuf);
|
||||
|
||||
if (statRes == 0)
|
||||
{
|
||||
if(statBuf.st_size < offset)
|
||||
{ // in case someone truncated the file while we're reading at a high offset
|
||||
offset = statBuf.st_size;
|
||||
resyncMsg.setOffset(offset);
|
||||
}
|
||||
else
|
||||
if(offset && !readRes)
|
||||
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_TRUNC;
|
||||
|
||||
int mode = statBuf.st_mode;
|
||||
unsigned userID = statBuf.st_uid;
|
||||
unsigned groupID = statBuf.st_gid;
|
||||
int64_t mtimeSecs = statBuf.st_mtim.tv_sec;
|
||||
int64_t atimeSecs = statBuf.st_atim.tv_sec;
|
||||
SettableFileAttribs chunkAttribs = {mode, userID,groupID, mtimeSecs, atimeSecs};
|
||||
resyncMsg.setChunkAttribs(chunkAttribs);
|
||||
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_SETATTRIBS;
|
||||
}
|
||||
else
|
||||
{
|
||||
LogContext(__func__).logErr("Error getting chunk attributes; "
|
||||
"chunkPath: " + chunkPathStr + "; "
|
||||
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
|
||||
"BuddyNode: " + node->getTypedNodeID() + "; "
|
||||
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
|
||||
"Error: " + System::getErrString(errno));
|
||||
}
|
||||
}
|
||||
|
||||
resyncMsg.setMsgHeaderFeatureFlags(resyncMsgFlags);
|
||||
resyncMsg.setMsgHeaderTargetID(buddyTargetID);
|
||||
|
||||
CombinedTargetState state;
|
||||
bool getStateRes =
|
||||
Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
|
||||
|
||||
// send request to node and receive response
|
||||
std::unique_ptr<NetMessage> respMsg;
|
||||
|
||||
while ( (!respMsg) && (getStateRes)
|
||||
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
|
||||
{
|
||||
respMsg = MessagingTk::requestResponse(*node, resyncMsg,
|
||||
NETMSGTYPE_ResyncLocalFileResp);
|
||||
|
||||
if (!respMsg)
|
||||
{
|
||||
LOG_DEBUG(__func__, Log_NOTICE,
|
||||
"Unable to communicate, but target is not offline; sleeping "
|
||||
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
|
||||
+ StringTk::uintToStr(targetID));
|
||||
|
||||
PThread::sleepMS(msgRetryIntervalMS);
|
||||
|
||||
// if thread shall terminate, break loop here
|
||||
if ( getSelfTerminateNotIdle() )
|
||||
break;
|
||||
|
||||
getStateRes =
|
||||
Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
|
||||
}
|
||||
}
|
||||
|
||||
if (!respMsg)
|
||||
{ // communication error
|
||||
LogContext(__func__).log(Log_WARNING,
|
||||
"Communication with storage node failed: " + node->getTypedNodeID());
|
||||
|
||||
retVal = FhgfsOpsErr_COMMUNICATION;
|
||||
|
||||
// set readRes to non-zero to force exiting loop
|
||||
readRes = -2;
|
||||
}
|
||||
else
|
||||
if(!getStateRes)
|
||||
{
|
||||
LogContext(__func__).log(Log_WARNING,
|
||||
"No valid state for node ID: " + node->getTypedNodeID());
|
||||
|
||||
retVal = FhgfsOpsErr_INTERNAL;
|
||||
|
||||
// set readRes to non-zero to force exiting loop
|
||||
readRes = -2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// correct response type received
|
||||
ResyncLocalFileRespMsg* respMsgCast = (ResyncLocalFileRespMsg*) respMsg.get();
|
||||
|
||||
FhgfsOpsErr syncRes = respMsgCast->getResult();
|
||||
|
||||
if(syncRes != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
LogContext(__func__).log(Log_WARNING, "Error during resync; "
|
||||
"chunkPath: " + chunkPathStr + "; "
|
||||
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
|
||||
"BuddyNode: " + node->getTypedNodeID() + "; "
|
||||
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
|
||||
"Error: " + boost::lexical_cast<std::string>(syncRes));
|
||||
|
||||
retVal = syncRes;
|
||||
|
||||
// set readRes to non-zero to force exiting loop
|
||||
readRes = -2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
end_of_loop:
|
||||
int closeRes = close(fd);
|
||||
if (closeRes == -1)
|
||||
{
|
||||
LogContext(__func__).log(Log_WARNING, "Error closing file descriptor; "
|
||||
"chunkPath: " + chunkPathStr + "; "
|
||||
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
|
||||
"BuddyNode: " + node->getTypedNodeID() + "; "
|
||||
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
|
||||
"Error: " + System::getErrString(errno));
|
||||
}
|
||||
// unlock the chunk
|
||||
chunkLockStore->unlockChunk(localTargetID, entryID);
|
||||
|
||||
// increment offset for next iteration
|
||||
offset += readRes;
|
||||
|
||||
if ( getSelfTerminateNotIdle() )
|
||||
{
|
||||
retVal = FhgfsOpsErr_INTERRUPTED;
|
||||
break;
|
||||
}
|
||||
|
||||
} while (readRes == SYNC_BLOCK_SIZE);
|
||||
|
||||
cleanup:
|
||||
LogContext(__func__).log(Log_DEBUG, "File sync finished. chunkPath: " + chunkPathStr);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Chunk has to be locked by caller.
|
||||
*/
|
||||
bool BuddyResyncerFileSyncSlave::removeBuddyChunkUnlocked(Node& node, uint16_t buddyTargetID,
|
||||
std::string& pathStr)
|
||||
{
|
||||
bool retVal = true;
|
||||
unsigned msgRetryIntervalMS = 5000;
|
||||
|
||||
std::string entryID = StorageTk::getPathBasename(pathStr);
|
||||
StringList rmPaths;
|
||||
rmPaths.push_back(pathStr);
|
||||
|
||||
RmChunkPathsMsg rmMsg(buddyTargetID, &rmPaths);
|
||||
rmMsg.addMsgHeaderFeatureFlag(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR);
|
||||
rmMsg.setMsgHeaderTargetID(buddyTargetID);
|
||||
|
||||
CombinedTargetState state;
|
||||
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
|
||||
|
||||
// send request to node and receive response
|
||||
std::unique_ptr<NetMessage> respMsg;
|
||||
|
||||
while ( (!respMsg) && (getStateRes)
|
||||
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
|
||||
{
|
||||
respMsg = MessagingTk::requestResponse(node, rmMsg, NETMSGTYPE_RmChunkPathsResp);
|
||||
|
||||
if (!respMsg)
|
||||
{
|
||||
LOG_DEBUG(__func__, Log_NOTICE,
|
||||
"Unable to communicate, but target is not offline; "
|
||||
"sleeping " + StringTk::uintToStr(msgRetryIntervalMS) + " ms before retry. "
|
||||
"targetID: " + StringTk::uintToStr(targetID) );
|
||||
|
||||
PThread::sleepMS(msgRetryIntervalMS);
|
||||
|
||||
// if thread shall terminate, break loop here
|
||||
if ( getSelfTerminateNotIdle() )
|
||||
break;
|
||||
|
||||
getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
|
||||
}
|
||||
}
|
||||
|
||||
if (!respMsg)
|
||||
{ // communication error
|
||||
LogContext(__func__).logErr(
|
||||
"Communication with storage node failed: " + node.getTypedNodeID() );
|
||||
|
||||
return false;
|
||||
}
|
||||
else
|
||||
if(!getStateRes)
|
||||
{
|
||||
LogContext(__func__).log(Log_WARNING,
|
||||
"No valid state for node ID: " + node.getTypedNodeID() );
|
||||
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// correct response type received
|
||||
RmChunkPathsRespMsg* respMsgCast = (RmChunkPathsRespMsg*) respMsg.get();
|
||||
StringList& failedPaths = respMsgCast->getFailedPaths();
|
||||
|
||||
for (StringListIter iter = failedPaths.begin(); iter != failedPaths.end(); iter++)
|
||||
{
|
||||
LogContext(__func__).logErr("Chunk path could not be deleted; "
|
||||
"path: " + *iter + "; "
|
||||
"targetID: " + StringTk::uintToStr(targetID) + "; "
|
||||
"node: " + node.getTypedNodeID());
|
||||
retVal = false;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/storage/mirroring/SyncCandidateStore.h>
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/threading/PThread.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
class BuddyResyncerFileSyncSlave : public PThread
|
||||
{
|
||||
friend class BuddyResyncer; // (to grant access to internal mutex)
|
||||
friend class BuddyResyncJob; // (to grant access to internal mutex)
|
||||
|
||||
public:
|
||||
BuddyResyncerFileSyncSlave(uint16_t targetID, ChunkSyncCandidateStore* syncCandidates,
|
||||
uint8_t slaveID);
|
||||
virtual ~BuddyResyncerFileSyncSlave();
|
||||
|
||||
private:
|
||||
AtomicSizeT onlyTerminateIfIdle; // atomic quasi-bool
|
||||
|
||||
Mutex statusMutex; // protects isRunning
|
||||
Condition isRunningChangeCond;
|
||||
|
||||
AtomicUInt64 numChunksSynced;
|
||||
AtomicUInt64 errorCount;
|
||||
|
||||
bool isRunning; // true if an instance of this component is currently running
|
||||
|
||||
uint16_t targetID;
|
||||
|
||||
ChunkSyncCandidateStore* syncCandidates;
|
||||
|
||||
virtual void run();
|
||||
void syncLoop();
|
||||
FhgfsOpsErr doResync(std::string& chunkPathStr, uint16_t localTargetID,
|
||||
uint16_t buddyTargetID);
|
||||
bool removeBuddyChunkUnlocked(Node& node, uint16_t buddyTargetID, std::string& pathStr);
|
||||
|
||||
public:
|
||||
// getters & setters
|
||||
bool getIsRunning()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
return this->isRunning;
|
||||
}
|
||||
|
||||
void setOnlyTerminateIfIdle(bool value)
|
||||
{
|
||||
if (value)
|
||||
onlyTerminateIfIdle.set(1);
|
||||
else
|
||||
onlyTerminateIfIdle.setZero();
|
||||
}
|
||||
|
||||
bool getOnlyTerminateIfIdle()
|
||||
{
|
||||
if (onlyTerminateIfIdle.read() == 0)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t getNumChunksSynced()
|
||||
{
|
||||
return numChunksSynced.read();
|
||||
}
|
||||
|
||||
uint64_t getErrorCount()
|
||||
{
|
||||
return errorCount.read();
|
||||
}
|
||||
|
||||
private:
|
||||
// getters & setters
|
||||
|
||||
void setIsRunning(bool isRunning)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
this->isRunning = isRunning;
|
||||
isRunningChangeCond.broadcast();
|
||||
}
|
||||
|
||||
bool getSelfTerminateNotIdle()
|
||||
{
|
||||
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::list<BuddyResyncerFileSyncSlave*> BuddyResyncerFileSyncSlaveList;
|
||||
typedef BuddyResyncerFileSyncSlaveList::iterator BuddyResyncerFileSyncSlaveListIter;
|
||||
|
||||
typedef std::vector<BuddyResyncerFileSyncSlave*> BuddyResyncerFileSyncSlaveVec;
|
||||
typedef BuddyResyncerFileSyncSlaveVec::iterator BuddyResyncerFileSyncSlaveVecIter;
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
#include <app/App.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include <storage/StorageTargets.h>
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "BuddyResyncerGatherSlave.h"
|
||||
|
||||
Mutex BuddyResyncerGatherSlave::staticGatherSlavesMutex;
|
||||
std::map<std::string, BuddyResyncerGatherSlave*> BuddyResyncerGatherSlave::staticGatherSlaves;
|
||||
|
||||
BuddyResyncerGatherSlave::BuddyResyncerGatherSlave(const StorageTarget& target,
|
||||
ChunkSyncCandidateStore* syncCandidates, BuddyResyncerGatherSlaveWorkQueue* workQueue,
|
||||
uint8_t slaveID) :
|
||||
PThread("BuddyResyncerGatherSlave_" + StringTk::uintToStr(target.getID()) + "-" +
|
||||
StringTk::uintToStr(slaveID)),
|
||||
target(target)
|
||||
{
|
||||
this->isRunning = false;
|
||||
this->syncCandidates = syncCandidates;
|
||||
this->workQueue = workQueue;
|
||||
|
||||
const std::lock_guard<Mutex> lock(staticGatherSlavesMutex);
|
||||
|
||||
staticGatherSlaves[this->getName()] = this;
|
||||
}
|
||||
|
||||
BuddyResyncerGatherSlave::~BuddyResyncerGatherSlave()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is a component, which is started through its control frontend on-demand at
|
||||
* runtime and terminates when it's done.
|
||||
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
|
||||
* of this thread running at the same time.
|
||||
*/
|
||||
void BuddyResyncerGatherSlave::run()
|
||||
{
|
||||
setIsRunning(true);
|
||||
|
||||
numChunksDiscovered.setZero();
|
||||
numChunksMatched.setZero();
|
||||
numDirsDiscovered.setZero();
|
||||
numDirsMatched.setZero();
|
||||
|
||||
try
|
||||
{
|
||||
LogContext(__func__).log(Log_DEBUG, "Component started.");
|
||||
|
||||
registerSignalHandler();
|
||||
|
||||
workLoop();
|
||||
|
||||
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
|
||||
}
|
||||
catch(std::exception& e)
|
||||
{
|
||||
PThread::getCurrentThreadApp()->handleComponentException(e);
|
||||
}
|
||||
|
||||
setIsRunning(false);
|
||||
}
|
||||
|
||||
void BuddyResyncerGatherSlave::workLoop()
|
||||
{
|
||||
const unsigned maxOpenFDsNum = 20; // max open FDs => max path sub-depth for efficient traversal
|
||||
|
||||
while (!getSelfTerminateNotIdle())
|
||||
{
|
||||
if ((workQueue->queueEmpty()) && (getSelfTerminate()))
|
||||
break;
|
||||
|
||||
// get a directory to scan
|
||||
std::string pathStr = workQueue->fetch(this);
|
||||
|
||||
if(unlikely(pathStr.empty()))
|
||||
continue;
|
||||
|
||||
int nftwRes = nftw(pathStr.c_str(), handleDiscoveredEntry, maxOpenFDsNum, FTW_ACTIONRETVAL);
|
||||
if(nftwRes == -1)
|
||||
{ // error occurred
|
||||
LogContext(__func__).logErr("Error during chunks walk. SysErr: " + System::getErrString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int BuddyResyncerGatherSlave::handleDiscoveredEntry(const char* path,
|
||||
const struct stat* statBuf, int ftwEntryType, struct FTW* ftwBuf)
|
||||
{
|
||||
std::string chunksPath;
|
||||
|
||||
BuddyResyncerGatherSlave* thisStatic = nullptr;
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(staticGatherSlavesMutex);
|
||||
|
||||
thisStatic = staticGatherSlaves[PThread::getCurrentThreadName()];
|
||||
}
|
||||
|
||||
App* app = Program::getApp();
|
||||
Config* cfg = app->getConfig();
|
||||
|
||||
const auto& targetPath = thisStatic->target.getPath().str();
|
||||
chunksPath = targetPath + "/" + CONFIG_BUDDYMIRROR_SUBDIR_NAME;
|
||||
|
||||
if (strlen(path) <= chunksPath.length())
|
||||
return FTW_CONTINUE;
|
||||
|
||||
std::string relPathStr = path + chunksPath.size() + 1;
|
||||
|
||||
if ( relPathStr.empty() )
|
||||
return FTW_CONTINUE;
|
||||
|
||||
const auto lastBuddyComm = thisStatic->target.getLastBuddyComm();
|
||||
|
||||
const bool buddyCommIsOverride = lastBuddyComm.first;
|
||||
int64_t lastBuddyCommTimeSecs = std::chrono::system_clock::to_time_t(lastBuddyComm.second);
|
||||
int64_t lastBuddyCommSafetyThresholdSecs = cfg->getSysResyncSafetyThresholdMins()*60;
|
||||
if ( (lastBuddyCommSafetyThresholdSecs == 0) && (!buddyCommIsOverride) ) // ignore timestamp file
|
||||
lastBuddyCommTimeSecs = 0;
|
||||
else
|
||||
if (lastBuddyCommTimeSecs > lastBuddyCommSafetyThresholdSecs)
|
||||
lastBuddyCommTimeSecs -= lastBuddyCommSafetyThresholdSecs;
|
||||
|
||||
if(ftwEntryType == FTW_D) // directory
|
||||
{
|
||||
thisStatic->numDirsDiscovered.increase();
|
||||
|
||||
int64_t dirModificationTime = (int64_t)statBuf->st_mtim.tv_sec;
|
||||
|
||||
if(dirModificationTime > lastBuddyCommTimeSecs)
|
||||
{ // sync candidate
|
||||
ChunkSyncCandidateDir candidate(relPathStr, thisStatic->target.getID());
|
||||
thisStatic->syncCandidates->add(candidate, thisStatic);
|
||||
thisStatic->numDirsMatched.increase();
|
||||
}
|
||||
}
|
||||
else
|
||||
if(ftwEntryType == FTW_F) // file
|
||||
{
|
||||
// we found a chunk
|
||||
thisStatic->numChunksDiscovered.increase();
|
||||
|
||||
// we need to use ctime here, because mtime can be set manually (even to the future)
|
||||
time_t chunkChangeTime = statBuf->st_ctim.tv_sec;
|
||||
|
||||
if(chunkChangeTime > lastBuddyCommTimeSecs)
|
||||
{ // sync candidate
|
||||
std::string relPathStr = path + chunksPath.size() + 1;
|
||||
|
||||
ChunkSyncCandidateFile candidate(relPathStr, thisStatic->target.getID());
|
||||
thisStatic->syncCandidates->add(candidate, thisStatic);
|
||||
|
||||
thisStatic->numChunksMatched.increase();
|
||||
}
|
||||
}
|
||||
|
||||
return FTW_CONTINUE;
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/storage/mirroring/SyncCandidateStore.h>
|
||||
#include <common/components/ComponentInitException.h>
|
||||
#include <common/threading/PThread.h>
|
||||
|
||||
#include <ftw.h>
|
||||
|
||||
class StorageTarget;
|
||||
|
||||
#define GATHERSLAVEQUEUE_MAXSIZE 5000
|
||||
|
||||
class BuddyResyncerGatherSlaveWorkQueue
|
||||
{
|
||||
/*
|
||||
* This is more or less just a small class for convenience, that is tightly coupled to
|
||||
* BuddyResyncerGatherSlave and BuddyResyncerJob
|
||||
*/
|
||||
public:
|
||||
BuddyResyncerGatherSlaveWorkQueue(): gatherSlavesWorkQueueLen(0) { }
|
||||
|
||||
private:
|
||||
StringList paths;
|
||||
size_t gatherSlavesWorkQueueLen; // used to avoid constant calling of size() method of list
|
||||
Mutex mutex;
|
||||
Condition pathAddedCond;
|
||||
Condition pathFetchedCond;
|
||||
|
||||
public:
|
||||
void add(std::string& path, PThread* caller)
|
||||
{
|
||||
unsigned waitTimeoutMS = 3000;
|
||||
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
|
||||
while (gatherSlavesWorkQueueLen > GATHERSLAVEQUEUE_MAXSIZE)
|
||||
{
|
||||
if((caller) && (unlikely(caller->getSelfTerminate())))
|
||||
break;
|
||||
pathFetchedCond.timedwait(&mutex, waitTimeoutMS);
|
||||
}
|
||||
|
||||
paths.push_back(path);
|
||||
gatherSlavesWorkQueueLen++;
|
||||
pathAddedCond.signal();
|
||||
}
|
||||
|
||||
std::string fetch(PThread* caller)
|
||||
{
|
||||
unsigned waitTimeoutMS = 3000;
|
||||
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
|
||||
while (paths.empty())
|
||||
{
|
||||
if((caller) && (unlikely(caller->getSelfTerminate())))
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
pathAddedCond.timedwait(&mutex, waitTimeoutMS);
|
||||
}
|
||||
|
||||
std::string retVal = paths.front();
|
||||
paths.pop_front();
|
||||
gatherSlavesWorkQueueLen--;
|
||||
pathFetchedCond.signal();
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool queueEmpty()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
|
||||
return gatherSlavesWorkQueueLen == 0;
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
|
||||
paths.clear();
|
||||
gatherSlavesWorkQueueLen = 0;
|
||||
}
|
||||
};
|
||||
|
||||
class BuddyResyncerGatherSlave : public PThread
|
||||
{
|
||||
friend class BuddyResyncer; // (to grant access to internal mutex)
|
||||
friend class BuddyResyncJob; // (to grant access to internal mutex)
|
||||
|
||||
public:
|
||||
BuddyResyncerGatherSlave(const StorageTarget& target, ChunkSyncCandidateStore* syncCandidates,
|
||||
BuddyResyncerGatherSlaveWorkQueue* workQueue, uint8_t slaveID);
|
||||
virtual ~BuddyResyncerGatherSlave();
|
||||
|
||||
void workLoop();
|
||||
|
||||
private:
|
||||
AtomicSizeT onlyTerminateIfIdle; // atomic quasi-bool
|
||||
|
||||
Mutex statusMutex; // protects isRunning
|
||||
Condition isRunningChangeCond;
|
||||
|
||||
const StorageTarget& target;
|
||||
|
||||
AtomicUInt64 numChunksDiscovered;
|
||||
AtomicUInt64 numChunksMatched;
|
||||
|
||||
AtomicUInt64 numDirsDiscovered;
|
||||
AtomicUInt64 numDirsMatched;
|
||||
|
||||
bool isRunning; // true if an instance of this component is currently running
|
||||
|
||||
ChunkSyncCandidateStore* syncCandidates;
|
||||
BuddyResyncerGatherSlaveWorkQueue* workQueue;
|
||||
|
||||
// nftw() callback needs access the slave threads
|
||||
static Mutex staticGatherSlavesMutex;
|
||||
static std::map<std::string, BuddyResyncerGatherSlave*> staticGatherSlaves;
|
||||
|
||||
virtual void run();
|
||||
|
||||
static int handleDiscoveredEntry(const char* path, const struct stat* statBuf,
|
||||
int ftwEntryType, struct FTW* ftwBuf);
|
||||
|
||||
public:
|
||||
// getters & setters
|
||||
bool getIsRunning()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
return this->isRunning;
|
||||
}
|
||||
|
||||
void getCounters(uint64_t& outNumChunksDiscovered, uint64_t& outNumChunksMatched,
|
||||
uint64_t& outNumDirsDiscovered, uint64_t& outNumDirsMatched)
|
||||
{
|
||||
outNumChunksDiscovered = numChunksDiscovered.read();
|
||||
outNumChunksMatched = numChunksMatched.read();
|
||||
outNumDirsDiscovered = numDirsDiscovered.read();
|
||||
outNumDirsMatched = numDirsMatched.read();
|
||||
}
|
||||
|
||||
void setOnlyTerminateIfIdle(bool value)
|
||||
{
|
||||
if (value)
|
||||
onlyTerminateIfIdle.set(1);
|
||||
else
|
||||
onlyTerminateIfIdle.setZero();
|
||||
}
|
||||
|
||||
bool getOnlyTerminateIfIdle()
|
||||
{
|
||||
if (onlyTerminateIfIdle.read() == 0)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
// getters & setters
|
||||
|
||||
void setIsRunning(bool isRunning)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
this->isRunning = isRunning;
|
||||
isRunningChangeCond.broadcast();
|
||||
}
|
||||
|
||||
bool getSelfTerminateNotIdle()
|
||||
{
|
||||
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::vector<BuddyResyncerGatherSlave*> BuddyResyncerGatherSlaveVec;
|
||||
typedef BuddyResyncerGatherSlaveVec::iterator BuddyResyncerGatherSlaveVecIter;
|
||||
|
||||
44
storage/source/components/buddyresyncer/SyncCandidate.h
Normal file
44
storage/source/components/buddyresyncer/SyncCandidate.h
Normal file
@@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/storage/mirroring/SyncCandidateStore.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* A storage sync candidate. Has a target ID and a path.
|
||||
*/
|
||||
class ChunkSyncCandidateDir
|
||||
{
|
||||
public:
|
||||
ChunkSyncCandidateDir(const std::string& relativePath, const uint16_t targetID)
|
||||
: relativePath(relativePath), targetID(targetID)
|
||||
{ }
|
||||
|
||||
ChunkSyncCandidateDir()
|
||||
: targetID(0)
|
||||
{ }
|
||||
|
||||
private:
|
||||
std::string relativePath;
|
||||
uint16_t targetID;
|
||||
|
||||
public:
|
||||
const std::string& getRelativePath() const { return relativePath; }
|
||||
uint16_t getTargetID() const { return targetID; }
|
||||
};
|
||||
|
||||
/**
|
||||
* A storage sync candidate that also has an onlyAttribs flag.
|
||||
*/
|
||||
class ChunkSyncCandidateFile : public ChunkSyncCandidateDir
|
||||
{
|
||||
public:
|
||||
ChunkSyncCandidateFile(const std::string& relativePath, uint16_t targetID)
|
||||
: ChunkSyncCandidateDir(relativePath, targetID)
|
||||
{ }
|
||||
|
||||
ChunkSyncCandidateFile() = default;
|
||||
};
|
||||
|
||||
typedef SyncCandidateStore<ChunkSyncCandidateDir, ChunkSyncCandidateFile> ChunkSyncCandidateStore;
|
||||
|
||||
88
storage/source/components/chunkfetcher/ChunkFetcher.cpp
Normal file
88
storage/source/components/chunkfetcher/ChunkFetcher.cpp
Normal file
@@ -0,0 +1,88 @@
|
||||
#include "ChunkFetcher.h"
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
#include <common/Common.h>
|
||||
|
||||
ChunkFetcher::ChunkFetcher()
|
||||
: log("ChunkFetcher")
|
||||
{
|
||||
// for each targetID, put one fetcher thread into list
|
||||
for (const auto& mapping : Program::getApp()->getStorageTargets()->getTargets())
|
||||
this->slaves.emplace_back(mapping.first);
|
||||
}
|
||||
|
||||
ChunkFetcher::~ChunkFetcher()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Start fetcher slaves if they are not running already.
|
||||
*
|
||||
* @return true if successfully started or already running, false if startup problem occurred.
|
||||
*/
|
||||
bool ChunkFetcher::startFetching()
|
||||
{
|
||||
const char* logContext = "ChunkFetcher (start)";
|
||||
bool retVal = true; // false if error occurred
|
||||
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(chunksListMutex);
|
||||
isBad = false;
|
||||
}
|
||||
|
||||
for(ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(iter->statusMutex);
|
||||
|
||||
if(!iter->isRunning)
|
||||
{
|
||||
// slave thread not running yet => start it
|
||||
iter->resetSelfTerminate();
|
||||
|
||||
try
|
||||
{
|
||||
iter->start();
|
||||
|
||||
iter->isRunning = true;
|
||||
}
|
||||
catch (PThreadCreateException& e)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("Unable to start thread: ") + e.what());
|
||||
retVal = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void ChunkFetcher::stopFetching()
|
||||
{
|
||||
for(ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(iter->statusMutex);
|
||||
|
||||
if(iter->isRunning)
|
||||
{
|
||||
iter->selfTerminate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ChunkFetcher::waitForStopFetching()
|
||||
{
|
||||
for(ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(iter->statusMutex);
|
||||
|
||||
chunksListFetchedCondition.broadcast();
|
||||
|
||||
while (iter->isRunning)
|
||||
{
|
||||
iter->isRunningChangeCond.wait(&(iter->statusMutex));
|
||||
}
|
||||
|
||||
chunksList.clear();
|
||||
}
|
||||
}
|
||||
101
storage/source/components/chunkfetcher/ChunkFetcher.h
Normal file
101
storage/source/components/chunkfetcher/ChunkFetcher.h
Normal file
@@ -0,0 +1,101 @@
|
||||
#pragma once
|
||||
|
||||
#include <components/chunkfetcher/ChunkFetcherSlave.h>
|
||||
#include <common/toolkit/ListTk.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#define MAX_CHUNKLIST_SIZE 5000
|
||||
|
||||
// forward declaration
|
||||
class ChunkFetcher;
|
||||
|
||||
typedef std::list<ChunkFetcherSlave> ChunkFetcherSlaveList;
|
||||
typedef ChunkFetcherSlaveList::iterator ChunkFetcherSlaveListIter;
|
||||
|
||||
/**
|
||||
* This is not a component that represents a separate thread. Instead, it contains and controls
|
||||
* slave threads, which are started and stopped on request (i.e. they are not automatically started
|
||||
* when the app is started).
|
||||
* The slave threads will run over all chunks on all targets and read them in a format suitable for
|
||||
* fsck
|
||||
*/
|
||||
class ChunkFetcher
|
||||
{
|
||||
public:
|
||||
ChunkFetcher();
|
||||
virtual ~ChunkFetcher();
|
||||
|
||||
bool startFetching();
|
||||
void stopFetching();
|
||||
void waitForStopFetching();
|
||||
|
||||
private:
|
||||
LogContext log;
|
||||
ChunkFetcherSlaveList slaves;
|
||||
|
||||
FsckChunkList chunksList;
|
||||
Mutex chunksListMutex;
|
||||
Condition chunksListFetchedCondition;
|
||||
bool isBad;
|
||||
|
||||
public:
|
||||
bool getIsBad()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(chunksListMutex);
|
||||
|
||||
return isBad;
|
||||
}
|
||||
|
||||
void setBad()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(chunksListMutex);
|
||||
|
||||
isBad = true;
|
||||
}
|
||||
|
||||
void addChunk(FsckChunk& chunk)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(chunksListMutex);
|
||||
|
||||
if (chunksList.size() > MAX_CHUNKLIST_SIZE)
|
||||
chunksListFetchedCondition.wait(&chunksListMutex);
|
||||
|
||||
chunksList.push_back(chunk);
|
||||
}
|
||||
|
||||
bool isQueueEmpty()
|
||||
{
|
||||
std::lock_guard<Mutex> lock(chunksListMutex);
|
||||
return chunksList.empty();
|
||||
}
|
||||
|
||||
|
||||
void getAndDeleteChunks(FsckChunkList& outList, unsigned numChunks)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(chunksListMutex);
|
||||
|
||||
FsckChunkListIter iterEnd = this->chunksList.begin();
|
||||
ListTk::advance(this->chunksList, iterEnd, numChunks);
|
||||
|
||||
outList.splice(outList.end(), this->chunksList, this->chunksList.begin(), iterEnd);
|
||||
|
||||
chunksListFetchedCondition.signal();
|
||||
}
|
||||
|
||||
unsigned getNumRunning()
|
||||
{
|
||||
unsigned retVal = 0;
|
||||
|
||||
for (ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(iter->statusMutex);
|
||||
|
||||
if ( iter->isRunning )
|
||||
retVal++;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
};
|
||||
|
||||
165
storage/source/components/chunkfetcher/ChunkFetcherSlave.cpp
Normal file
165
storage/source/components/chunkfetcher/ChunkFetcherSlave.cpp
Normal file
@@ -0,0 +1,165 @@
|
||||
#include "ChunkFetcherSlave.h"
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
#include <boost/static_assert.hpp>
|
||||
#include <libgen.h>
|
||||
|
||||
ChunkFetcherSlave::ChunkFetcherSlave(uint16_t targetID):
|
||||
PThread("ChunkFetcherSlave-" + StringTk::uintToStr(targetID) ),
|
||||
log("ChunkFetcherSlave-" + StringTk::uintToStr(targetID) ),
|
||||
isRunning(false),
|
||||
targetID(targetID)
|
||||
{
|
||||
}
|
||||
|
||||
ChunkFetcherSlave::~ChunkFetcherSlave()
|
||||
{
|
||||
}
|
||||
|
||||
void ChunkFetcherSlave::run()
|
||||
{
|
||||
setIsRunning(true);
|
||||
|
||||
try
|
||||
{
|
||||
registerSignalHandler();
|
||||
|
||||
walkAllChunks();
|
||||
|
||||
log.log(4, "Component stopped.");
|
||||
}
|
||||
catch(std::exception& e)
|
||||
{
|
||||
PThread::getCurrentThreadApp()->handleComponentException(e);
|
||||
}
|
||||
|
||||
setIsRunning(false);
|
||||
}
|
||||
|
||||
/*
|
||||
* walk over all chunks in that target
|
||||
*/
|
||||
void ChunkFetcherSlave::walkAllChunks()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
|
||||
log.log(Log_DEBUG, "Starting chunks walk...");
|
||||
|
||||
const auto& target = *app->getStorageTargets()->getTargets().at(targetID);
|
||||
|
||||
const auto& targetPath = target.getPath().str();
|
||||
|
||||
// walk over "normal" chunks (i.e. no mirrors)
|
||||
std::string walkPath = targetPath + "/" + CONFIG_CHUNK_SUBDIR_NAME;
|
||||
if(!walkChunkPath(walkPath, 0, walkPath.size() ) )
|
||||
return;
|
||||
|
||||
// let's find out if this target is part of a buddy mirror group and if it is the primary
|
||||
// target; if it is, walk over buddy mirror directory
|
||||
bool isPrimaryTarget;
|
||||
uint16_t buddyGroupID = app->getMirrorBuddyGroupMapper()->getBuddyGroupID(this->targetID,
|
||||
&isPrimaryTarget);
|
||||
|
||||
if (isPrimaryTarget)
|
||||
{
|
||||
walkPath = targetPath + "/" CONFIG_BUDDYMIRROR_SUBDIR_NAME;
|
||||
if(!walkChunkPath(walkPath, buddyGroupID, walkPath.size() ) )
|
||||
return;
|
||||
}
|
||||
|
||||
log.log(Log_DEBUG, "End of chunks walk.");
|
||||
}
|
||||
|
||||
bool ChunkFetcherSlave::walkChunkPath(const std::string& path, uint16_t buddyGroupID,
|
||||
unsigned basePathLen)
|
||||
{
|
||||
DIR* dir = ::opendir(path.c_str() );
|
||||
if(!dir)
|
||||
{
|
||||
LOG(GENERAL, WARNING, "Could not open directory.", path, targetID, sysErr);
|
||||
Program::getApp()->getChunkFetcher()->setBad();
|
||||
return false;
|
||||
}
|
||||
|
||||
int readRes;
|
||||
bool result = true;
|
||||
|
||||
std::string pathBuf = path;
|
||||
pathBuf.push_back('/');
|
||||
|
||||
while(!getSelfTerminate())
|
||||
{
|
||||
::dirent* item;
|
||||
|
||||
// we really want struct struct dirent to contain a reasonably sized array for the filename
|
||||
BOOST_STATIC_ASSERT(sizeof(item->d_name) >= NAME_MAX + 1);
|
||||
|
||||
#if USE_READDIR_R
|
||||
::dirent entry;
|
||||
readRes = ::readdir_r(dir, &entry, &item);
|
||||
#else
|
||||
errno = 0;
|
||||
item = readdir(dir);
|
||||
readRes = item ? 0 : errno;
|
||||
#endif
|
||||
if(readRes != 0)
|
||||
{
|
||||
LOG(GENERAL, WARNING, "readdir failed.", path, targetID, sysErr(readRes));
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if(!item)
|
||||
break;
|
||||
|
||||
if(::strcmp(item->d_name, ".") == 0 || ::strcmp(item->d_name, "..") == 0)
|
||||
continue;
|
||||
|
||||
pathBuf.resize(path.size() + 1);
|
||||
pathBuf += item->d_name;
|
||||
|
||||
struct stat statBuf;
|
||||
|
||||
int statRes = ::stat(pathBuf.c_str(), &statBuf);
|
||||
if(statRes)
|
||||
{
|
||||
LOG(GENERAL, WARNING, "Could not stat directory.", ("path", pathBuf), targetID, sysErr);
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if(S_ISDIR(statBuf.st_mode) )
|
||||
{
|
||||
result = walkChunkPath(pathBuf, buddyGroupID, basePathLen);
|
||||
if(!result)
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
const char* relativeChunkPath = pathBuf.c_str() + basePathLen + 1;
|
||||
|
||||
// get only the dirname part of the path
|
||||
char* tmpPathCopy = strdup(relativeChunkPath);
|
||||
Path savedPath(dirname(tmpPathCopy) );
|
||||
|
||||
free(tmpPathCopy);
|
||||
|
||||
FsckChunk fsckChunk(item->d_name, targetID, savedPath, statBuf.st_size, statBuf.st_blocks,
|
||||
statBuf.st_ctime, statBuf.st_mtime, statBuf.st_atime, statBuf.st_uid, statBuf.st_gid,
|
||||
buddyGroupID);
|
||||
|
||||
Program::getApp()->getChunkFetcher()->addChunk(fsckChunk);
|
||||
}
|
||||
}
|
||||
|
||||
::closedir(dir);
|
||||
|
||||
if (getSelfTerminate())
|
||||
result = false;
|
||||
|
||||
if(!result)
|
||||
Program::getApp()->getChunkFetcher()->setBad();
|
||||
|
||||
return result;
|
||||
}
|
||||
62
storage/source/components/chunkfetcher/ChunkFetcherSlave.h
Normal file
62
storage/source/components/chunkfetcher/ChunkFetcherSlave.h
Normal file
@@ -0,0 +1,62 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/components/ComponentInitException.h>
|
||||
#include <common/fsck/FsckChunk.h>
|
||||
#include <common/threading/PThread.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
class ChunkFetcher; //forward decl.
|
||||
|
||||
/**
|
||||
* This component runs over all chunks of one target and gathers information suitable for fsck
|
||||
*
|
||||
* This component is not auto-started when the app starts. It is started and stopped by the
|
||||
* ChunkFetcher.
|
||||
*/
|
||||
class ChunkFetcherSlave : public PThread
|
||||
{
|
||||
friend class ChunkFetcher; // (to grant access to internal mutex)
|
||||
|
||||
public:
|
||||
ChunkFetcherSlave(uint16_t targetID);
|
||||
virtual ~ChunkFetcherSlave();
|
||||
|
||||
private:
|
||||
LogContext log;
|
||||
|
||||
Mutex statusMutex; // protects isRunning
|
||||
Condition isRunningChangeCond;
|
||||
|
||||
bool isRunning; // true if an instance of this component is currently running
|
||||
|
||||
uint16_t targetID;
|
||||
|
||||
virtual void run();
|
||||
|
||||
public:
|
||||
// getters & setters
|
||||
bool getIsRunning(bool isRunning)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
return this->isRunning;
|
||||
}
|
||||
|
||||
private:
|
||||
void walkAllChunks();
|
||||
|
||||
bool walkChunkPath(const std::string& path, uint16_t buddyGroupID, unsigned basePathLen);
|
||||
|
||||
// getters & setters
|
||||
|
||||
void setIsRunning(bool isRunning)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(statusMutex);
|
||||
|
||||
this->isRunning = isRunning;
|
||||
isRunningChangeCond.broadcast();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include <app/App.h>
|
||||
#include <common/components/streamlistenerv2/StreamListenerV2.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
|
||||
/**
|
||||
* Other than common StreamListenerV2, this class can handle mutliple work queues through an
|
||||
* overridden getWorkQueue() method.
|
||||
*/
|
||||
class StorageStreamListenerV2 : public StreamListenerV2
|
||||
{
|
||||
public:
|
||||
StorageStreamListenerV2(std::string listenerID, AbstractApp* app):
|
||||
StreamListenerV2(listenerID, app, NULL)
|
||||
{
|
||||
// nothing to be done here
|
||||
}
|
||||
|
||||
virtual ~StorageStreamListenerV2() {}
|
||||
|
||||
|
||||
protected:
|
||||
// getters & setters
|
||||
|
||||
virtual MultiWorkQueue* getWorkQueue(uint16_t targetID) const
|
||||
{
|
||||
return Program::getApp()->getWorkQueue(targetID);
|
||||
}
|
||||
};
|
||||
|
||||
84
storage/source/components/worker/StorageBenchWork.cpp
Normal file
84
storage/source/components/worker/StorageBenchWork.cpp
Normal file
@@ -0,0 +1,84 @@
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/benchmark/StorageBench.h>
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <program/Program.h>
|
||||
#include "StorageBenchWork.h"
|
||||
|
||||
void StorageBenchWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
|
||||
unsigned bufOutLen)
|
||||
{
|
||||
const char* logContext = "Storage Benchmark (run)";
|
||||
|
||||
App* app = Program::getApp();
|
||||
Config* cfg = app->getConfig();
|
||||
|
||||
int workRes = 0; // return value for benchmark operator
|
||||
ssize_t ioRes = 0; // read/write result
|
||||
|
||||
if (this->type == StorageBenchType_READ)
|
||||
{
|
||||
size_t readSize = cfg->getTuneFileReadSize();
|
||||
size_t toBeRead = this->bufLen;
|
||||
size_t bufOffset = 0;
|
||||
|
||||
while(toBeRead)
|
||||
{
|
||||
size_t currentReadSize = BEEGFS_MIN(readSize, toBeRead);
|
||||
|
||||
ioRes = read(this->fileDescriptor, &this->buf[bufOffset], currentReadSize);
|
||||
if (ioRes <= 0)
|
||||
break;
|
||||
|
||||
toBeRead -= currentReadSize;
|
||||
bufOffset += currentReadSize;
|
||||
}
|
||||
|
||||
app->getNodeOpStats()->updateNodeOp(0, StorageOpCounter_READOPS,
|
||||
this->bufLen, NETMSG_DEFAULT_USERID);
|
||||
}
|
||||
else
|
||||
if (this->type == StorageBenchType_WRITE)
|
||||
{
|
||||
size_t writeSize = cfg->getTuneFileWriteSize();
|
||||
size_t toBeWritten = this->bufLen;
|
||||
size_t bufOffset = 0;
|
||||
|
||||
while(toBeWritten)
|
||||
{
|
||||
size_t currentWriteSize = BEEGFS_MIN(writeSize, toBeWritten);
|
||||
|
||||
ioRes = write(this->fileDescriptor, &this->buf[bufOffset], currentWriteSize);
|
||||
if (ioRes <= 0)
|
||||
break;
|
||||
|
||||
toBeWritten -= currentWriteSize;
|
||||
bufOffset += currentWriteSize;
|
||||
}
|
||||
|
||||
app->getNodeOpStats()->updateNodeOp(0, StorageOpCounter_WRITEOPS,
|
||||
this->bufLen, NETMSG_DEFAULT_USERID);
|
||||
}
|
||||
else
|
||||
{ // unknown benchmark type
|
||||
workRes = STORAGEBENCH_ERROR_WORKER_ERROR;
|
||||
LogContext(logContext).logErr("Error: unknown benchmark type");
|
||||
}
|
||||
|
||||
if(unlikely(workRes < 0) || unlikely(ioRes == -1) )
|
||||
{ // error occurred
|
||||
if (ioRes == -1)
|
||||
{ // read or write operation failed
|
||||
LogContext(logContext).logErr(std::string("Error: I/O failure. SysErr: ") +
|
||||
System::getErrString() );
|
||||
}
|
||||
|
||||
workRes = STORAGEBENCH_ERROR_WORKER_ERROR;
|
||||
|
||||
this->operatorCommunication->getWriteFD()->write(&workRes, sizeof(int) );
|
||||
}
|
||||
else
|
||||
{ // success
|
||||
this->operatorCommunication->getWriteFD()->write(&this->threadID, sizeof(int) );
|
||||
}
|
||||
}
|
||||
|
||||
43
storage/source/components/worker/StorageBenchWork.h
Normal file
43
storage/source/components/worker/StorageBenchWork.h
Normal file
@@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/benchmark/StorageBench.h>
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/toolkit/Pipe.h>
|
||||
#include <common/Common.h>
|
||||
|
||||
|
||||
|
||||
class StorageBenchWork: public Work
|
||||
{
|
||||
public:
|
||||
StorageBenchWork(uint16_t targetID, int threadID, int fileDescriptor,
|
||||
StorageBenchType type, int64_t bufLen, Pipe* operatorCommunication, char* buf)
|
||||
{
|
||||
this->targetID = targetID;
|
||||
this->threadID = threadID;
|
||||
this->fileDescriptor = fileDescriptor;
|
||||
|
||||
this->type = type;
|
||||
this->bufLen = bufLen;
|
||||
this->operatorCommunication = operatorCommunication;
|
||||
this->buf = buf;
|
||||
}
|
||||
|
||||
virtual ~StorageBenchWork()
|
||||
{
|
||||
}
|
||||
|
||||
void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
|
||||
|
||||
protected:
|
||||
|
||||
private:
|
||||
uint16_t targetID;
|
||||
int threadID; // virtual threadID
|
||||
int fileDescriptor;
|
||||
StorageBenchType type;
|
||||
int64_t bufLen;
|
||||
char* buf;
|
||||
Pipe* operatorCommunication;
|
||||
};
|
||||
|
||||
203
storage/source/net/message/NetMessageFactory.cpp
Normal file
203
storage/source/net/message/NetMessageFactory.cpp
Normal file
@@ -0,0 +1,203 @@
|
||||
// control messages
|
||||
#include <common/net/message/control/AuthenticateChannelMsgEx.h>
|
||||
#include <common/net/message/control/GenericResponseMsg.h>
|
||||
#include <common/net/message/control/PeerInfoMsgEx.h>
|
||||
#include "control/AckMsgEx.h"
|
||||
#include "control/SetChannelDirectMsgEx.h"
|
||||
|
||||
// nodes messages
|
||||
#include <common/net/message/nodes/ChangeTargetConsistencyStatesRespMsg.h>
|
||||
#include <common/net/message/nodes/GetMirrorBuddyGroupsRespMsg.h>
|
||||
#include <common/net/message/nodes/GetNodesRespMsg.h>
|
||||
#include <common/net/message/nodes/GetStatesAndBuddyGroupsRespMsg.h>
|
||||
#include <common/net/message/nodes/storagepools/GetStoragePoolsRespMsg.h>
|
||||
#include <common/net/message/nodes/GetTargetMappingsRespMsg.h>
|
||||
#include <common/net/message/nodes/GetTargetStatesRespMsg.h>
|
||||
#include <common/net/message/nodes/MapTargetsRespMsg.h>
|
||||
#include <common/net/message/nodes/RegisterNodeRespMsg.h>
|
||||
#include <common/net/message/nodes/RegisterTargetRespMsg.h>
|
||||
#include <common/net/message/nodes/RemoveNodeRespMsg.h>
|
||||
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
|
||||
#include <common/net/message/nodes/GetTargetConsistencyStatesRespMsg.h>
|
||||
#include <net/message/nodes/GenericDebugMsgEx.h>
|
||||
#include <net/message/nodes/GetClientStatsMsgEx.h>
|
||||
#include <net/message/nodes/HeartbeatMsgEx.h>
|
||||
#include <net/message/nodes/HeartbeatRequestMsgEx.h>
|
||||
#include <net/message/nodes/MapTargetsMsgEx.h>
|
||||
#include <net/message/nodes/PublishCapacitiesMsgEx.h>
|
||||
#include <net/message/nodes/RefreshTargetStatesMsgEx.h>
|
||||
#include <net/message/nodes/RemoveBuddyGroupMsgEx.h>
|
||||
#include <net/message/nodes/RemoveNodeMsgEx.h>
|
||||
#include <net/message/nodes/SetMirrorBuddyGroupMsgEx.h>
|
||||
#include <net/message/nodes/SetTargetConsistencyStatesMsgEx.h>
|
||||
#include <net/message/nodes/GetTargetConsistencyStatesMsgEx.h>
|
||||
|
||||
// storage messages
|
||||
#include <common/net/message/storage/attribs/SetLocalAttrRespMsg.h>
|
||||
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
|
||||
#include <common/net/message/storage/creating/UnlinkLocalFileRespMsg.h>
|
||||
#include <common/net/message/storage/listing/ListChunkDirIncrementalRespMsg.h>
|
||||
#include <common/net/message/storage/lookup/FindOwnerRespMsg.h>
|
||||
#include <common/net/message/storage/mirroring/ResyncLocalFileRespMsg.h>
|
||||
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
|
||||
#include <common/net/message/storage/quota/GetQuotaInfoMsg.h>
|
||||
#include <common/net/message/storage/quota/RequestExceededQuotaRespMsg.h>
|
||||
#include <common/net/message/storage/TruncLocalFileRespMsg.h>
|
||||
#include <common/net/message/storage/SetStorageTargetInfoRespMsg.h>
|
||||
#include <net/message/storage/attribs/GetChunkFileAttribsMsgEx.h>
|
||||
#include <net/message/storage/attribs/SetLocalAttrMsgEx.h>
|
||||
#include <net/message/storage/creating/RmChunkPathsMsgEx.h>
|
||||
#include <net/message/storage/creating/UnlinkLocalFileMsgEx.h>
|
||||
#include <net/message/storage/listing/ListChunkDirIncrementalMsgEx.h>
|
||||
#include <net/message/storage/mirroring/GetStorageResyncStatsMsgEx.h>
|
||||
#include <net/message/storage/mirroring/ResyncLocalFileMsgEx.h>
|
||||
#include <net/message/storage/mirroring/SetLastBuddyCommOverrideMsgEx.h>
|
||||
#include <net/message/storage/mirroring/StorageResyncStartedMsgEx.h>
|
||||
#include <net/message/storage/quota/GetQuotaInfoMsgEx.h>
|
||||
#include <net/message/storage/quota/SetExceededQuotaMsgEx.h>
|
||||
#include <net/message/storage/GetHighResStatsMsgEx.h>
|
||||
#include <net/message/storage/StatStoragePathMsgEx.h>
|
||||
#include <net/message/storage/TruncLocalFileMsgEx.h>
|
||||
|
||||
// session messages
|
||||
#include <common/net/message/session/opening/CloseChunkFileRespMsg.h>
|
||||
#include <common/net/message/session/rw/WriteLocalFileRespMsg.h>
|
||||
#include <net/message/session/opening/CloseChunkFileMsgEx.h>
|
||||
#include <net/message/session/rw/ReadLocalFileV2MsgEx.h>
|
||||
#include <net/message/session/rw/WriteLocalFileMsgEx.h>
|
||||
#include <net/message/session/FSyncLocalFileMsgEx.h>
|
||||
|
||||
#ifdef BEEGFS_NVFS
|
||||
#include <net/message/session/rw/ReadLocalFileRDMAMsgEx.h>
|
||||
#include <net/message/session/rw/WriteLocalFileRDMAMsgEx.h>
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
// mon messages
|
||||
#include <net/message/mon/RequestStorageDataMsgEx.h>
|
||||
|
||||
// fsck
|
||||
#include <net/message/fsck/DeleteChunksMsgEx.h>
|
||||
#include <net/message/fsck/FetchFsckChunkListMsgEx.h>
|
||||
#include <net/message/fsck/MoveChunkFileMsgEx.h>
|
||||
|
||||
// storage benchmark
|
||||
#include <common/net/message/nodes/StorageBenchControlMsg.h>
|
||||
#include <net/message/nodes/StorageBenchControlMsgEx.h>
|
||||
|
||||
// chunk balancing
|
||||
#include <common/net/message/storage/chunkbalancing/StripePatternUpdateRespMsg.h>
|
||||
#include <common/net/message/storage/chunkbalancing/CpChunkPathsRespMsg.h>
|
||||
#include <net/message/storage/chunkbalancing/CpChunkPathsMsgEx.h>
|
||||
|
||||
#include <common/net/message/SimpleMsg.h>
|
||||
#include <net/message/nodes/storagepools/RefreshStoragePoolsMsgEx.h>
|
||||
#include "NetMessageFactory.h"
|
||||
|
||||
/**
|
||||
* @return NetMessage that must be deleted by the caller
|
||||
* (msg->msgType is NETMSGTYPE_Invalid on error)
|
||||
*/
|
||||
std::unique_ptr<NetMessage> NetMessageFactory::createFromMsgType(unsigned short msgType) const
|
||||
{
|
||||
NetMessage* msg;
|
||||
|
||||
switch(msgType)
|
||||
{
|
||||
// The following lines are grouped by "type of the message" and ordered alphabetically inside
|
||||
// the groups. There should always be one message per line to keep a clear layout (although
|
||||
// this might lead to lines that are longer than usual)
|
||||
|
||||
// control messages
|
||||
case NETMSGTYPE_Ack: { msg = new AckMsgEx(); } break;
|
||||
case NETMSGTYPE_AuthenticateChannel: { msg = new AuthenticateChannelMsgEx(); } break;
|
||||
case NETMSGTYPE_GenericResponse: { msg = new GenericResponseMsg(); } break;
|
||||
case NETMSGTYPE_SetChannelDirect: { msg = new SetChannelDirectMsgEx(); } break;
|
||||
case NETMSGTYPE_PeerInfo: { msg = new PeerInfoMsgEx(); } break;
|
||||
|
||||
// nodes messages
|
||||
case NETMSGTYPE_ChangeTargetConsistencyStatesResp: { msg = new ChangeTargetConsistencyStatesRespMsg(); } break;
|
||||
case NETMSGTYPE_GenericDebug: { msg = new GenericDebugMsgEx(); } break;
|
||||
case NETMSGTYPE_GetClientStats: { msg = new GetClientStatsMsgEx(); } break;
|
||||
case NETMSGTYPE_GetMirrorBuddyGroupsResp: { msg = new GetMirrorBuddyGroupsRespMsg(); } break;
|
||||
case NETMSGTYPE_GetNodesResp: { msg = new GetNodesRespMsg(); } break;
|
||||
case NETMSGTYPE_GetStatesAndBuddyGroupsResp: { msg = new GetStatesAndBuddyGroupsRespMsg(); } break;
|
||||
case NETMSGTYPE_GetStoragePoolsResp: { msg = new GetStoragePoolsRespMsg(); } break;
|
||||
case NETMSGTYPE_GetTargetMappingsResp: { msg = new GetTargetMappingsRespMsg(); } break;
|
||||
case NETMSGTYPE_GetTargetStatesResp: { msg = new GetTargetStatesRespMsg(); } break;
|
||||
case NETMSGTYPE_HeartbeatRequest: { msg = new HeartbeatRequestMsgEx(); } break;
|
||||
case NETMSGTYPE_Heartbeat: { msg = new HeartbeatMsgEx(); } break;
|
||||
case NETMSGTYPE_MapTargets: { msg = new MapTargetsMsgEx(); } break;
|
||||
case NETMSGTYPE_PublishCapacities: { msg = new PublishCapacitiesMsgEx(); } break;
|
||||
case NETMSGTYPE_MapTargetsResp: { msg = new MapTargetsRespMsg(); } break;
|
||||
case NETMSGTYPE_StorageBenchControlMsg: {msg = new StorageBenchControlMsgEx(); } break;
|
||||
case NETMSGTYPE_RefreshStoragePools: { msg = new RefreshStoragePoolsMsgEx(); } break;
|
||||
case NETMSGTYPE_RefreshTargetStates: { msg = new RefreshTargetStatesMsgEx(); } break;
|
||||
case NETMSGTYPE_RegisterNodeResp: { msg = new RegisterNodeRespMsg(); } break;
|
||||
case NETMSGTYPE_RegisterTargetResp: { msg = new RegisterTargetRespMsg(); } break;
|
||||
case NETMSGTYPE_RemoveBuddyGroup: { msg = new RemoveBuddyGroupMsgEx(); } break;
|
||||
case NETMSGTYPE_RemoveNode: { msg = new RemoveNodeMsgEx(); } break;
|
||||
case NETMSGTYPE_RemoveNodeResp: { msg = new RemoveNodeRespMsg(); } break;
|
||||
case NETMSGTYPE_SetMirrorBuddyGroup: { msg = new SetMirrorBuddyGroupMsgEx(); } break;
|
||||
case NETMSGTYPE_SetTargetConsistencyStates: { msg = new SetTargetConsistencyStatesMsgEx(); } break;
|
||||
case NETMSGTYPE_SetTargetConsistencyStatesResp: { msg = new SetTargetConsistencyStatesRespMsg(); } break;
|
||||
case NETMSGTYPE_GetTargetConsistencyStates: { msg = new GetTargetConsistencyStatesMsgEx(); } break;
|
||||
case NETMSGTYPE_GetTargetConsistencyStatesResp: { msg = new GetTargetConsistencyStatesRespMsg(); } break;
|
||||
|
||||
// storage messages
|
||||
case NETMSGTYPE_CpChunkPaths: { msg = new CpChunkPathsMsgEx(); } break;
|
||||
case NETMSGTYPE_CpChunkPathsResp: { msg = new CpChunkPathsRespMsg(); } break;
|
||||
case NETMSGTYPE_FindOwnerResp: { msg = new FindOwnerRespMsg(); } break;
|
||||
case NETMSGTYPE_GetChunkFileAttribs: { msg = new GetChunkFileAttribsMsgEx(); } break;
|
||||
case NETMSGTYPE_GetHighResStats: { msg = new GetHighResStatsMsgEx(); } break;
|
||||
case NETMSGTYPE_GetQuotaInfo: {msg = new GetQuotaInfoMsgEx(); } break;
|
||||
case NETMSGTYPE_GetStorageResyncStats: { msg = new GetStorageResyncStatsMsgEx(); } break;
|
||||
case NETMSGTYPE_ListChunkDirIncremental: { msg = new ListChunkDirIncrementalMsgEx(); } break;
|
||||
case NETMSGTYPE_ListChunkDirIncrementalResp: { msg = new ListChunkDirIncrementalRespMsg(); } break;
|
||||
case NETMSGTYPE_RequestExceededQuotaResp: {msg = new RequestExceededQuotaRespMsg(); } break;
|
||||
case NETMSGTYPE_ResyncLocalFile: { msg = new ResyncLocalFileMsgEx(); } break;
|
||||
case NETMSGTYPE_ResyncLocalFileResp: { msg = new ResyncLocalFileRespMsg(); } break;
|
||||
case NETMSGTYPE_RmChunkPaths: { msg = new RmChunkPathsMsgEx(); } break;
|
||||
case NETMSGTYPE_RmChunkPathsResp: { msg = new RmChunkPathsRespMsg(); } break;
|
||||
case NETMSGTYPE_SetExceededQuota: {msg = new SetExceededQuotaMsgEx(); } break;
|
||||
case NETMSGTYPE_SetLastBuddyCommOverride: { msg = new SetLastBuddyCommOverrideMsgEx(); } break;
|
||||
case NETMSGTYPE_SetLocalAttr: { msg = new SetLocalAttrMsgEx(); } break;
|
||||
case NETMSGTYPE_SetLocalAttrResp: { msg = new SetLocalAttrRespMsg(); } break;
|
||||
case NETMSGTYPE_SetStorageTargetInfoResp: { msg = new SetStorageTargetInfoRespMsg(); } break;
|
||||
case NETMSGTYPE_StatStoragePath: { msg = new StatStoragePathMsgEx(); } break;
|
||||
case NETMSGTYPE_StorageResyncStarted: { msg = new StorageResyncStartedMsgEx(); } break;
|
||||
case NETMSGTYPE_StorageResyncStartedResp: { msg = new StorageResyncStartedRespMsg(); } break;
|
||||
case NETMSGTYPE_StripePatternUpdateResp: { msg = new StripePatternUpdateRespMsg(); } break;
|
||||
case NETMSGTYPE_TruncLocalFile: { msg = new TruncLocalFileMsgEx(); } break;
|
||||
case NETMSGTYPE_TruncLocalFileResp: { msg = new TruncLocalFileRespMsg(); } break;
|
||||
case NETMSGTYPE_UnlinkLocalFile: { msg = new UnlinkLocalFileMsgEx(); } break;
|
||||
case NETMSGTYPE_UnlinkLocalFileResp: { msg = new UnlinkLocalFileRespMsg(); } break;
|
||||
|
||||
// session messages
|
||||
case NETMSGTYPE_CloseChunkFile: { msg = new CloseChunkFileMsgEx(); } break;
|
||||
case NETMSGTYPE_CloseChunkFileResp: { msg = new CloseChunkFileRespMsg(); } break;
|
||||
case NETMSGTYPE_FSyncLocalFile: { msg = new FSyncLocalFileMsgEx(); } break;
|
||||
case NETMSGTYPE_ReadLocalFileV2: { msg = new ReadLocalFileV2MsgEx(); } break;
|
||||
case NETMSGTYPE_WriteLocalFile: { msg = new WriteLocalFileMsgEx(); } break;
|
||||
case NETMSGTYPE_WriteLocalFileResp: { msg = new WriteLocalFileRespMsg(); } break;
|
||||
#ifdef BEEGFS_NVFS
|
||||
case NETMSGTYPE_ReadLocalFileRDMA: { msg = new ReadLocalFileRDMAMsgEx(); } break;
|
||||
case NETMSGTYPE_WriteLocalFileRDMA: { msg = new WriteLocalFileRDMAMsgEx(); } break;
|
||||
#endif // BEEGFS_NVFS
|
||||
|
||||
// mon message
|
||||
case NETMSGTYPE_RequestStorageData: { msg = new RequestStorageDataMsgEx(); } break;
|
||||
|
||||
// fsck
|
||||
case NETMSGTYPE_DeleteChunks: { msg = new DeleteChunksMsgEx(); } break;
|
||||
case NETMSGTYPE_FetchFsckChunkList: { msg = new FetchFsckChunkListMsgEx(); } break;
|
||||
case NETMSGTYPE_MoveChunkFile: { msg = new MoveChunkFileMsgEx(); } break;
|
||||
|
||||
default:
|
||||
{
|
||||
msg = new SimpleMsg(NETMSGTYPE_Invalid);
|
||||
} break;
|
||||
}
|
||||
|
||||
return std::unique_ptr<NetMessage>(msg);
|
||||
}
|
||||
|
||||
14
storage/source/net/message/NetMessageFactory.h
Normal file
14
storage/source/net/message/NetMessageFactory.h
Normal file
@@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <common/net/message/AbstractNetMessageFactory.h>
|
||||
|
||||
class NetMessageFactory : public AbstractNetMessageFactory
|
||||
{
|
||||
public:
|
||||
NetMessageFactory() {}
|
||||
|
||||
protected:
|
||||
virtual std::unique_ptr<NetMessage> createFromMsgType(unsigned short msgType) const override;
|
||||
} ;
|
||||
|
||||
22
storage/source/net/message/control/AckMsgEx.cpp
Normal file
22
storage/source/net/message/control/AckMsgEx.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
#include <program/Program.h>
|
||||
#include "AckMsgEx.h"
|
||||
|
||||
bool AckMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
LogContext log("Ack incoming");
|
||||
|
||||
LOG_DEBUG_CONTEXT(log, 5, std::string("Value: ") + getValue() );
|
||||
|
||||
AcknowledgmentStore* ackStore = Program::getApp()->getAckStore();
|
||||
ackStore->receivedAck(getValue() );
|
||||
|
||||
// note: this message does not require a response
|
||||
|
||||
App* app = Program::getApp();
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_ACK,
|
||||
getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
13
storage/source/net/message/control/AckMsgEx.h
Normal file
13
storage/source/net/message/control/AckMsgEx.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/control/AckMsg.h>
|
||||
|
||||
// see class AcknowledgeableMsg (fhgfs_common) for a short description
|
||||
|
||||
class AckMsgEx : public AckMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
|
||||
19
storage/source/net/message/control/SetChannelDirectMsgEx.cpp
Normal file
19
storage/source/net/message/control/SetChannelDirectMsgEx.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <program/Program.h>
|
||||
#include "SetChannelDirectMsgEx.h"
|
||||
|
||||
bool SetChannelDirectMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
LogContext log("SetChannelDirect incoming");
|
||||
|
||||
LOG_DEBUG_CONTEXT(log, 5, std::string("Value: ") + StringTk::intToStr(getValue() ) );
|
||||
|
||||
ctx.getSocket()->setIsDirect(getValue() );
|
||||
|
||||
App* app = Program::getApp();
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
|
||||
StorageOpCounter_SETCHANNELDIRECT, getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
13
storage/source/net/message/control/SetChannelDirectMsgEx.h
Normal file
13
storage/source/net/message/control/SetChannelDirectMsgEx.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/control/SetChannelDirectMsg.h>
|
||||
|
||||
// direct means the message is definitely processed on this server and not forwarded to another
|
||||
|
||||
class SetChannelDirectMsgEx : public SetChannelDirectMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
|
||||
60
storage/source/net/message/fsck/DeleteChunksMsgEx.cpp
Normal file
60
storage/source/net/message/fsck/DeleteChunksMsgEx.cpp
Normal file
@@ -0,0 +1,60 @@
|
||||
#include "DeleteChunksMsgEx.h"
|
||||
|
||||
#include <program/Program.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
|
||||
bool DeleteChunksMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
const char* logContext = "DeleteChunksMsg incoming";
|
||||
|
||||
App* app = Program::getApp();
|
||||
ChunkStore* chunkDirStore = app->getChunkDirStore();
|
||||
|
||||
FsckChunkList& chunks = getChunks();
|
||||
FsckChunkList failedDeletes;
|
||||
|
||||
for ( FsckChunkListIter iter = chunks.begin(); iter != chunks.end(); iter++ )
|
||||
{
|
||||
std::string chunkDirRelative;
|
||||
std::string delPathStrRelative;
|
||||
bool isMirrorFD = iter->getBuddyGroupID();
|
||||
|
||||
chunkDirRelative = iter->getSavedPath()->str();
|
||||
|
||||
delPathStrRelative = chunkDirRelative + "/" + iter->getID();
|
||||
|
||||
auto* const target = app->getStorageTargets()->getTarget(iter->getTargetID());
|
||||
|
||||
if (!target)
|
||||
{ // unknown targetID
|
||||
LogContext(logContext).logErr(std::string("Unknown targetID: ") +
|
||||
StringTk::uintToStr(iter->getTargetID()));
|
||||
failedDeletes.push_back(*iter);
|
||||
}
|
||||
else
|
||||
{ // valid targetID
|
||||
int targetFD = isMirrorFD ? *target->getMirrorFD() : *target->getChunkFD();
|
||||
int unlinkRes = unlinkat(targetFD, delPathStrRelative.c_str(), 0);
|
||||
if ( (unlinkRes == -1) && (errno != ENOENT) )
|
||||
{ // error
|
||||
LogContext(logContext).logErr(
|
||||
"Unable to unlink file: " + delPathStrRelative + ". " + "SysErr: "
|
||||
+ System::getErrString());
|
||||
|
||||
failedDeletes.push_back(*iter);
|
||||
}
|
||||
|
||||
// Now try to rmdir chunkDirPath (checks if it is empty)
|
||||
if (unlinkRes == 0)
|
||||
{
|
||||
Path chunkDirRelativeVec(chunkDirRelative);
|
||||
chunkDirStore->rmdirChunkDirPath(targetFD, &chunkDirRelativeVec);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
ctx.sendResponse(DeleteChunksRespMsg(&failedDeletes) );
|
||||
|
||||
return true;
|
||||
}
|
||||
12
storage/source/net/message/fsck/DeleteChunksMsgEx.h
Normal file
12
storage/source/net/message/fsck/DeleteChunksMsgEx.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/NetMessage.h>
|
||||
#include <common/net/message/fsck/DeleteChunksMsg.h>
|
||||
#include <common/net/message/fsck/DeleteChunksRespMsg.h>
|
||||
|
||||
class DeleteChunksMsgEx : public DeleteChunksMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
53
storage/source/net/message/fsck/FetchFsckChunkListMsgEx.cpp
Normal file
53
storage/source/net/message/fsck/FetchFsckChunkListMsgEx.cpp
Normal file
@@ -0,0 +1,53 @@
|
||||
#include "FetchFsckChunkListMsgEx.h"
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
bool FetchFsckChunkListMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
ChunkFetcher* chunkFetcher = app->getChunkFetcher();
|
||||
|
||||
FetchFsckChunkListStatus status;
|
||||
FsckChunkList chunkList;
|
||||
|
||||
if (getLastStatus() == FetchFsckChunkListStatus_NOTSTARTED)
|
||||
{
|
||||
// This is the first message of a new Fsck run
|
||||
if (chunkFetcher->getNumRunning() != 0 || !chunkFetcher->isQueueEmpty())
|
||||
{
|
||||
// another fsck is already in progress
|
||||
if (!getForceRestart())
|
||||
{
|
||||
LOG(GENERAL, NOTICE, "Received request to start fsck although previous run is not finished. "
|
||||
"Not starting.", ("From", ctx.peerName()));
|
||||
|
||||
ctx.sendResponse(FetchFsckChunkListRespMsg(&chunkList,
|
||||
FetchFsckChunkListStatus_NOTSTARTED));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(GENERAL, NOTICE, "Aborting previous fsck chunk fetcher run by user request.",
|
||||
("From", ctx.peerName()));
|
||||
|
||||
chunkFetcher->stopFetching();
|
||||
chunkFetcher->waitForStopFetching();
|
||||
}
|
||||
}
|
||||
|
||||
chunkFetcher->startFetching();
|
||||
}
|
||||
|
||||
|
||||
if(chunkFetcher->getIsBad())
|
||||
status = FetchFsckChunkListStatus_READERROR;
|
||||
else if (chunkFetcher->getNumRunning() == 0)
|
||||
status = FetchFsckChunkListStatus_FINISHED;
|
||||
else
|
||||
status = FetchFsckChunkListStatus_RUNNING;
|
||||
|
||||
chunkFetcher->getAndDeleteChunks(chunkList, getMaxNumChunks());
|
||||
|
||||
ctx.sendResponse(FetchFsckChunkListRespMsg(&chunkList, status));
|
||||
return true;
|
||||
}
|
||||
11
storage/source/net/message/fsck/FetchFsckChunkListMsgEx.h
Normal file
11
storage/source/net/message/fsck/FetchFsckChunkListMsgEx.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/fsck/FetchFsckChunkListMsg.h>
|
||||
#include <common/net/message/fsck/FetchFsckChunkListRespMsg.h>
|
||||
|
||||
class FetchFsckChunkListMsgEx : public FetchFsckChunkListMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
88
storage/source/net/message/fsck/MoveChunkFileMsgEx.cpp
Normal file
88
storage/source/net/message/fsck/MoveChunkFileMsgEx.cpp
Normal file
@@ -0,0 +1,88 @@
|
||||
#include "MoveChunkFileMsgEx.h"
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
bool MoveChunkFileMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
ctx.sendResponse(MoveChunkFileRespMsg(moveChunk()));
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned MoveChunkFileMsgEx::moveChunk()
|
||||
{
|
||||
const char* logContext = "MoveChunkFileMsg incoming";
|
||||
|
||||
App* app = Program::getApp();
|
||||
|
||||
std::string chunkName = this->getChunkName();
|
||||
std::string oldPath = this->getOldPath(); // relative path to chunks dir
|
||||
std::string newPath = this->getNewPath(); // relative path to chunks dir
|
||||
uint16_t targetID = this->getTargetID();
|
||||
bool overwriteExisting = this->getOverwriteExisting();
|
||||
|
||||
int renameRes;
|
||||
|
||||
std::string moveFrom = oldPath + "/" + chunkName;
|
||||
std::string moveTo = newPath + "/" + chunkName;
|
||||
|
||||
auto* const target = app->getStorageTargets()->getTarget(targetID);
|
||||
|
||||
if (!target)
|
||||
{
|
||||
LogContext(logContext).log(Log_CRITICAL, "Could not open path for target ID; targetID: "
|
||||
+ StringTk::uintToStr(targetID));
|
||||
return 1;
|
||||
}
|
||||
|
||||
const auto targetPath = getIsMirrored()
|
||||
? target->getPath() / CONFIG_BUDDYMIRROR_SUBDIR_NAME
|
||||
: target->getPath() / CONFIG_CHUNK_SUBDIR_NAME;
|
||||
|
||||
const int targetFD = getIsMirrored() ? *target->getMirrorFD() : *target->getChunkFD();
|
||||
|
||||
// if overwriteExisting set to false, make sure, that output file does not exist
|
||||
if (!overwriteExisting)
|
||||
{
|
||||
bool pathExists = StorageTk::pathExists(targetFD, moveTo);
|
||||
if (pathExists)
|
||||
{
|
||||
LogContext(logContext).log(Log_CRITICAL,
|
||||
"Could not move chunk file. Destination file does already exist; chunkID: " + chunkName
|
||||
+ "; targetID: " + StringTk::uintToStr(targetID) + "; oldChunkPath: " + oldPath
|
||||
+ "; newChunkPath: " + newPath);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// create the parent directory (perhaps it didn't exist)
|
||||
// can be more efficient if we write a createPathOnDisk that uses mkdirat
|
||||
const Path moveToPath = targetPath / moveTo;
|
||||
mode_t dirMode = S_IRWXU | S_IRWXG | S_IRWXO;
|
||||
bool mkdirRes = StorageTk::createPathOnDisk(moveToPath, true, &dirMode);
|
||||
|
||||
if(!mkdirRes)
|
||||
{
|
||||
LogContext(logContext).log(Log_CRITICAL,
|
||||
"Could not create parent directory for chunk; chunkID: " + chunkName + "; targetID: "
|
||||
+ StringTk::uintToStr(targetID) + "; oldChunkPath: " + oldPath + "; newChunkPath: "
|
||||
+ newPath);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// perform the actual move
|
||||
renameRes = renameat(targetFD, moveFrom.c_str(), targetFD, moveTo.c_str() );
|
||||
if ( renameRes != 0 )
|
||||
{
|
||||
LogContext(logContext).log(Log_CRITICAL,
|
||||
"Could not perform move; chunkID: " + chunkName + "; targetID: "
|
||||
+ StringTk::uintToStr(targetID) + "; oldChunkPath: " + oldPath + "; newChunkPath: "
|
||||
+ newPath + "; SysErr: " + System::getErrString());
|
||||
return 1;
|
||||
}
|
||||
else if (getIsMirrored())
|
||||
target->setBuddyNeedsResync(true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
15
storage/source/net/message/fsck/MoveChunkFileMsgEx.h
Normal file
15
storage/source/net/message/fsck/MoveChunkFileMsgEx.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/NetMessage.h>
|
||||
#include <common/net/message/fsck/MoveChunkFileMsg.h>
|
||||
#include <common/net/message/fsck/MoveChunkFileRespMsg.h>
|
||||
|
||||
class MoveChunkFileMsgEx : public MoveChunkFileMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
unsigned moveChunk();
|
||||
};
|
||||
|
||||
68
storage/source/net/message/mon/RequestStorageDataMsgEx.cpp
Normal file
68
storage/source/net/message/mon/RequestStorageDataMsgEx.cpp
Normal file
@@ -0,0 +1,68 @@
|
||||
#include "RequestStorageDataMsgEx.h"
|
||||
|
||||
bool RequestStorageDataMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
Node& node = app->getLocalNode();
|
||||
MultiWorkQueueMap* workQueueMap = app->getWorkQueueMap();
|
||||
StorageTargets* storageTargets = app->getStorageTargets();
|
||||
|
||||
// get disk space of each target
|
||||
|
||||
StorageTargetInfoList storageTargetInfoList;
|
||||
|
||||
storageTargets->generateTargetInfoList(storageTargetInfoList);
|
||||
|
||||
// compute total disk space and total free space
|
||||
|
||||
int64_t diskSpaceTotal = 0; // sum of all targets
|
||||
int64_t diskSpaceFree = 0; // sum of all targets
|
||||
|
||||
for(StorageTargetInfoListIter iter = storageTargetInfoList.begin();
|
||||
iter != storageTargetInfoList.end();
|
||||
iter++)
|
||||
{
|
||||
if(diskSpaceTotal == -1)
|
||||
continue; // statfs() failed on this target
|
||||
|
||||
diskSpaceTotal += iter->getDiskSpaceTotal();
|
||||
diskSpaceFree += iter->getDiskSpaceFree();
|
||||
}
|
||||
|
||||
|
||||
unsigned sessionCount = app->getSessions()->getSize();
|
||||
|
||||
NicAddressList nicList(node.getNicList());
|
||||
std::string hostnameid = System::getHostname();
|
||||
|
||||
// highresStats
|
||||
HighResStatsList statsHistory;
|
||||
uint64_t lastStatsMS = getValue();
|
||||
|
||||
// get stats history
|
||||
StatsCollector* statsCollector = app->getStatsCollector();
|
||||
statsCollector->getStatsSince(lastStatsMS, statsHistory);
|
||||
|
||||
// get work queue stats
|
||||
unsigned indirectWorkListSize = 0;
|
||||
unsigned directWorkListSize = 0;
|
||||
|
||||
for(MultiWorkQueueMapCIter iter = workQueueMap->begin(); iter != workQueueMap->end(); iter++)
|
||||
{
|
||||
indirectWorkListSize += iter->second->getIndirectWorkListSize();
|
||||
directWorkListSize += iter->second->getDirectWorkListSize();
|
||||
}
|
||||
|
||||
RequestStorageDataRespMsg requestStorageDataRespMsg(node.getAlias(), hostnameid, node.getNumID(),
|
||||
&nicList, indirectWorkListSize, directWorkListSize, diskSpaceTotal, diskSpaceFree,
|
||||
sessionCount, &statsHistory, &storageTargetInfoList);
|
||||
ctx.sendResponse(requestStorageDataRespMsg);
|
||||
|
||||
LOG_DEBUG(__func__, Log_SPAM, std::string("Sent a message with type: " ) +
|
||||
StringTk::uintToStr(requestStorageDataRespMsg.getMsgType() ) + std::string(" to mon") );
|
||||
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
|
||||
StorageOpCounter_REQUESTSTORAGEDATA, getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
20
storage/source/net/message/mon/RequestStorageDataMsgEx.h
Normal file
20
storage/source/net/message/mon/RequestStorageDataMsgEx.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <app/App.h>
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/components/worker/queue/MultiWorkQueue.h>
|
||||
#include <common/net/message/mon/RequestStorageDataMsg.h>
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/storage/StorageDefinitions.h>
|
||||
#include <common/storage/StorageTargetInfo.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/net/message/mon/RequestStorageDataRespMsg.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
|
||||
class RequestStorageDataMsgEx : public RequestStorageDataMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
428
storage/source/net/message/nodes/GenericDebugMsgEx.cpp
Normal file
428
storage/source/net/message/nodes/GenericDebugMsgEx.cpp
Normal file
@@ -0,0 +1,428 @@
|
||||
#include <common/net/message/nodes/GenericDebugRespMsg.h>
|
||||
#include <common/net/msghelpers/MsgHelperGenericDebug.h>
|
||||
#include <common/storage/quota/Quota.h>
|
||||
#include <common/storage/StoragePoolId.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <program/Program.h>
|
||||
#include <session/ZfsSession.h>
|
||||
#include <toolkit/QuotaTk.h>
|
||||
#include "GenericDebugMsgEx.h"
|
||||
|
||||
|
||||
|
||||
#define GENDBGMSG_OP_LISTOPENFILES "listopenfiles"
|
||||
#define GENDBGMSG_OP_VERSION "version"
|
||||
#define GENDBGMSG_OP_MSGQUEUESTATS "msgqueuestats"
|
||||
#define GENDBGMSG_OP_RESYNCQUEUELEN "resyncqueuelen"
|
||||
#define GENDBGMSG_OP_CHUNKLOCKSTORESIZE "chunklockstoresize"
|
||||
#define GENDBGMSG_OP_CHUNKLOCKSTORECONTENTS "chunklockstore"
|
||||
#define GENDBGMSG_OP_SETREJECTIONRATE "setrejectionrate"
|
||||
|
||||
|
||||
bool GenericDebugMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
LogContext log("GenericDebugMsg incoming");
|
||||
|
||||
LOG_DEBUG_CONTEXT(log, 5, std::string("Command string: ") + getCommandStr() );
|
||||
|
||||
std::string cmdRespStr = processCommand();
|
||||
|
||||
ctx.sendResponse(GenericDebugRespMsg(cmdRespStr.c_str() ) );
|
||||
|
||||
App* app = Program::getApp();
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_GENERICDEBUG,
|
||||
getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return command response string
|
||||
*/
|
||||
std::string GenericDebugMsgEx::processCommand()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
Config* cfg = app->getConfig();
|
||||
|
||||
std::string responseStr;
|
||||
std::string operation;
|
||||
|
||||
// load command string into a stream to allow us to use getline
|
||||
std::istringstream commandStream(getCommandStr() );
|
||||
|
||||
// get operation type from command string
|
||||
std::getline(commandStream, operation, ' ');
|
||||
|
||||
if(operation == GENDBGMSG_OP_LISTOPENFILES)
|
||||
responseStr = processOpListOpenFiles(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_VERSION)
|
||||
responseStr = processOpVersion(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_MSGQUEUESTATS)
|
||||
responseStr = processOpMsgQueueStats(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_VARLOGMESSAGES)
|
||||
responseStr = MsgHelperGenericDebug::processOpVarLogMessages(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_VARLOGKERNLOG)
|
||||
responseStr = MsgHelperGenericDebug::processOpVarLogKernLog(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_FHGFSLOG)
|
||||
responseStr = MsgHelperGenericDebug::processOpFhgfsLog(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_LOADAVG)
|
||||
responseStr = MsgHelperGenericDebug::processOpLoadAvg(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_DROPCACHES)
|
||||
responseStr = MsgHelperGenericDebug::processOpDropCaches(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_GETCFG)
|
||||
responseStr = MsgHelperGenericDebug::processOpCfgFile(commandStream, cfg->getCfgFile() );
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_GETLOGLEVEL)
|
||||
responseStr = MsgHelperGenericDebug::processOpGetLogLevel(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_SETLOGLEVEL)
|
||||
responseStr = MsgHelperGenericDebug::processOpSetLogLevel(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_NETOUT)
|
||||
responseStr = MsgHelperGenericDebug::processOpNetOut(commandStream,
|
||||
app->getMgmtNodes(), app->getMetaNodes(), app->getStorageNodes() );
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_QUOTAEXCEEDED)
|
||||
responseStr = processOpQuotaExceeded(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_USEDQUOTA)
|
||||
responseStr = processOpUsedQuota(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_RESYNCQUEUELEN)
|
||||
responseStr = processOpResyncQueueLen(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_CHUNKLOCKSTORESIZE)
|
||||
responseStr = processOpChunkLockStoreSize(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_CHUNKLOCKSTORECONTENTS)
|
||||
responseStr = processOpChunkLockStoreContents(commandStream);
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_LISTSTORAGESTATES)
|
||||
responseStr = MsgHelperGenericDebug::processOpListTargetStates(commandStream,
|
||||
app->getTargetStateStore() );
|
||||
else
|
||||
if(operation == GENDBGMSG_OP_SETREJECTIONRATE)
|
||||
responseStr = processOpSetRejectionRate(commandStream);
|
||||
else
|
||||
responseStr = "Unknown/invalid operation";
|
||||
|
||||
return responseStr;
|
||||
}
|
||||
|
||||
std::string GenericDebugMsgEx::processOpListOpenFiles(std::istringstream& commandStream)
|
||||
{
|
||||
// protocol: no arguments
|
||||
|
||||
App* app = Program::getApp();
|
||||
SessionStore* sessions = app->getSessions();
|
||||
|
||||
std::ostringstream responseStream;
|
||||
NumNodeIDList sessionIDs;
|
||||
size_t numFilesTotal = 0;
|
||||
size_t numCheckedSessions = 0; // may defer from number of initially queried sessions
|
||||
|
||||
size_t numSessions = sessions->getAllSessionIDs(&sessionIDs);
|
||||
|
||||
responseStream << "Found " << numSessions << " sessions." << std::endl;
|
||||
|
||||
responseStream << std::endl;
|
||||
|
||||
// walk over all sessions
|
||||
for(NumNodeIDListCIter iter = sessionIDs.begin(); iter != sessionIDs.end(); iter++)
|
||||
{
|
||||
// note: sessionID might have become removed since we queried it, e.g. because client is gone
|
||||
|
||||
auto session = sessions->referenceSession(*iter);
|
||||
if(!session)
|
||||
continue;
|
||||
|
||||
numCheckedSessions++;
|
||||
|
||||
SessionLocalFileStore* sessionFiles = session->getLocalFiles();
|
||||
|
||||
size_t numFiles = sessionFiles->getSize();
|
||||
|
||||
if(!numFiles)
|
||||
continue; // only print sessions with open files
|
||||
|
||||
numFilesTotal += numFiles;
|
||||
|
||||
responseStream << *iter << ": " << numFiles << std::endl;
|
||||
}
|
||||
|
||||
responseStream << std::endl;
|
||||
|
||||
responseStream << "Final results: " << numFilesTotal << " open files in " <<
|
||||
numCheckedSessions << " checked sessions";
|
||||
|
||||
return responseStream.str();
|
||||
}
|
||||
|
||||
std::string GenericDebugMsgEx::processOpVersion(std::istringstream& commandStream)
|
||||
{
|
||||
return BEEGFS_VERSION;
|
||||
}
|
||||
|
||||
std::string GenericDebugMsgEx::processOpMsgQueueStats(std::istringstream& commandStream)
|
||||
{
|
||||
// protocol: no arguments
|
||||
|
||||
App* app = Program::getApp();
|
||||
MultiWorkQueueMap* workQueueMap = app->getWorkQueueMap();
|
||||
|
||||
std::ostringstream responseStream;
|
||||
std::string indirectQueueStats;
|
||||
std::string directQueueStats;
|
||||
std::string busyStats;
|
||||
|
||||
for(MultiWorkQueueMapCIter iter = workQueueMap->begin(); iter != workQueueMap->end(); iter++)
|
||||
{
|
||||
MultiWorkQueue* workQ = iter->second;
|
||||
|
||||
workQ->getStatsAsStr(indirectQueueStats, directQueueStats, busyStats);
|
||||
|
||||
responseStream << "* [queue id " << iter->first << "] "
|
||||
"general queue stats: " << std::endl <<
|
||||
indirectQueueStats << std::endl;
|
||||
|
||||
responseStream << "* [queue id " << iter->first << "] "
|
||||
"direct queue stats: " << std::endl <<
|
||||
directQueueStats << std::endl;
|
||||
|
||||
responseStream << "* [queue id " << iter->first << "] "
|
||||
"busy worker stats: " << std::endl <<
|
||||
busyStats << std::endl;
|
||||
}
|
||||
|
||||
return responseStream.str();
|
||||
}
|
||||
|
||||
std::string GenericDebugMsgEx::processOpQuotaExceeded(std::istringstream& commandStream)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
|
||||
std::string targetIdStr;
|
||||
std::getline(commandStream, targetIdStr, ' ');
|
||||
uint16_t targetId = StringTk::strToUInt(targetIdStr);
|
||||
|
||||
if(!app->getConfig()->getQuotaEnableEnforcement() )
|
||||
return "No quota exceeded IDs on this storage daemon because quota enforcement is"
|
||||
"disabled.";
|
||||
|
||||
ExceededQuotaStorePtr exQuotaStore = app->getExceededQuotaStores()->get(targetId);
|
||||
// exQuotaStore may be null;needs to be checked in MsgHelperGenericDebug::processOpQuotaExceeded
|
||||
return MsgHelperGenericDebug::processOpQuotaExceeded(commandStream, exQuotaStore.get());
|
||||
}
|
||||
|
||||
std::string GenericDebugMsgEx::processOpUsedQuota(std::istringstream& commandStream)
|
||||
{
|
||||
App *app = Program::getApp();
|
||||
|
||||
std::ostringstream responseStream;
|
||||
|
||||
ZfsSession session;
|
||||
QuotaDataType quotaDataType = QuotaDataType_NONE;
|
||||
std::string quotaDataTypeStr;
|
||||
bool forEachTarget = false;
|
||||
unsigned rangeStart = 0;
|
||||
unsigned rangeEnd = 0;
|
||||
|
||||
// get parameter from command string
|
||||
std::string inputString;
|
||||
while(!commandStream.eof() )
|
||||
{
|
||||
std::getline(commandStream, inputString, ' ');
|
||||
|
||||
if(inputString == "uid")
|
||||
{
|
||||
quotaDataType = QuotaDataType_USER;
|
||||
quotaDataTypeStr = "user";
|
||||
}
|
||||
else
|
||||
if(inputString == "gid")
|
||||
{
|
||||
quotaDataType = QuotaDataType_GROUP;
|
||||
quotaDataTypeStr = "group";
|
||||
}
|
||||
else
|
||||
if(inputString == "forEachTarget")
|
||||
forEachTarget = true;
|
||||
else
|
||||
if(inputString == "range")
|
||||
{
|
||||
std::string rangeValue;
|
||||
std::getline(commandStream, rangeValue, ' ');
|
||||
rangeStart = StringTk::strToUInt(rangeValue);
|
||||
std::getline(commandStream, rangeValue, ' ');
|
||||
rangeEnd = StringTk::strToUInt(rangeValue);
|
||||
}
|
||||
}
|
||||
|
||||
// verify given parameters
|
||||
if(quotaDataType == QuotaDataType_NONE)
|
||||
return "Invalid or missing quota data type argument.";
|
||||
if(rangeStart == 0 && rangeEnd == 0)
|
||||
return "Invalid or missing range argument.";
|
||||
|
||||
|
||||
if(forEachTarget)
|
||||
{
|
||||
const auto& targets = app->getStorageTargets()->getTargets();
|
||||
|
||||
responseStream << "Quota data of " << targets.size() << " targets." << std::endl;
|
||||
|
||||
for (const auto& mapping : targets)
|
||||
{
|
||||
const auto& target = *mapping.second;
|
||||
|
||||
QuotaDataList outQuotaDataList;
|
||||
|
||||
QuotaBlockDeviceMap quotaBlockDevices = {
|
||||
{mapping.first, target.getQuotaBlockDevice()}
|
||||
};
|
||||
|
||||
QuotaTk::requestQuotaForRange("aBlockDevices, rangeStart, rangeEnd, quotaDataType,
|
||||
&outQuotaDataList, &session);
|
||||
|
||||
responseStream << outQuotaDataList.size() << " used quota for " << quotaDataTypeStr
|
||||
<< " IDs on target: " << mapping.first << std::endl;
|
||||
|
||||
QuotaData::quotaDataListToString(outQuotaDataList, &responseStream);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto& targets = app->getStorageTargets()->getTargets();
|
||||
|
||||
QuotaBlockDeviceMap quotaBlockDevices;
|
||||
|
||||
std::transform(
|
||||
targets.begin(), targets.end(),
|
||||
std::inserter(quotaBlockDevices, quotaBlockDevices.end()),
|
||||
[] (const auto& target) {
|
||||
return std::make_pair(target.first, target.second->getQuotaBlockDevice());
|
||||
});
|
||||
|
||||
QuotaDataList outQuotaDataList;
|
||||
|
||||
QuotaTk::requestQuotaForRange("aBlockDevices, rangeStart, rangeEnd, quotaDataType,
|
||||
&outQuotaDataList, &session);
|
||||
|
||||
QuotaData::quotaDataListToString(outQuotaDataList, &responseStream);
|
||||
}
|
||||
|
||||
return responseStream.str();
|
||||
}
|
||||
|
||||
|
||||
std::string GenericDebugMsgEx::processOpResyncQueueLen(std::istringstream& commandStream)
|
||||
{
|
||||
// protocol: targetID files/dirs as argument (e.g. "resyncqueuelen 1234 files")
|
||||
|
||||
// get parameter from command string
|
||||
std::string targetIDStr;
|
||||
uint16_t targetID;
|
||||
std::string typeStr;
|
||||
std::getline(commandStream, targetIDStr, ' ');
|
||||
std::getline(commandStream, typeStr, ' ');
|
||||
targetID = StringTk::strToUInt(targetIDStr);
|
||||
|
||||
if (targetID == 0)
|
||||
return "Invalid or missing targetID";
|
||||
|
||||
BuddyResyncJob* resyncJob = Program::getApp()->getBuddyResyncer()->getResyncJob(targetID);
|
||||
|
||||
if (!resyncJob)
|
||||
return "0";
|
||||
|
||||
if (typeStr == "files")
|
||||
{
|
||||
size_t count = resyncJob->syncCandidates.getNumFiles();
|
||||
return StringTk::uintToStr(count);
|
||||
}
|
||||
else
|
||||
if (typeStr == "dirs")
|
||||
{
|
||||
size_t count = resyncJob->syncCandidates.getNumDirs();
|
||||
return StringTk::uintToStr(count);
|
||||
}
|
||||
else
|
||||
return "Invalid or missing queue type";
|
||||
}
|
||||
|
||||
std::string GenericDebugMsgEx::processOpChunkLockStoreSize(std::istringstream& commandStream)
|
||||
{
|
||||
// protocol: targetID as argument (e.g. "chunklockstoresize 1234")
|
||||
|
||||
// get parameter from command string
|
||||
std::string targetIDStr;
|
||||
uint16_t targetID;
|
||||
std::getline(commandStream, targetIDStr, ' ');
|
||||
targetID = StringTk::strToUInt(targetIDStr);
|
||||
|
||||
if (targetID == 0)
|
||||
return "Invalid or missing targetID";
|
||||
|
||||
size_t lockStoreSize = Program::getApp()->getChunkLockStore()->getSize(targetID);
|
||||
|
||||
return StringTk::uintToStr(lockStoreSize);
|
||||
}
|
||||
|
||||
std::string GenericDebugMsgEx::processOpChunkLockStoreContents(std::istringstream& commandStream)
|
||||
{
|
||||
// protocol: targetID and size limit (optional) as argument (e.g. "chunklockstoresize 1234 50")
|
||||
std::stringstream outStream;
|
||||
|
||||
// get parameter from command string
|
||||
std::string targetIDStr;
|
||||
uint16_t targetID;
|
||||
std::string maxEntriesStr;
|
||||
unsigned maxEntries;
|
||||
std::getline(commandStream, targetIDStr, ' ');
|
||||
targetID = StringTk::strToUInt(targetIDStr);
|
||||
std::getline(commandStream, maxEntriesStr, ' ');
|
||||
maxEntries = StringTk::strToUInt(maxEntriesStr);
|
||||
|
||||
if (targetID == 0)
|
||||
return "Invalid or missing targetID";
|
||||
|
||||
StringSet lockStoreContents = Program::getApp()->getChunkLockStore()->getLockStoreCopy(targetID);
|
||||
unsigned lockStoreSize = lockStoreContents.size();
|
||||
StringSetIter lockStoreIter = lockStoreContents.begin();
|
||||
|
||||
if ( (maxEntries == 0) || (maxEntries > lockStoreSize) )
|
||||
maxEntries = lockStoreSize;
|
||||
|
||||
for (unsigned i = 0; i < maxEntries; i++)
|
||||
{
|
||||
outStream << *lockStoreIter << std::endl;
|
||||
lockStoreIter++;
|
||||
}
|
||||
|
||||
return outStream.str();
|
||||
}
|
||||
|
||||
std::string GenericDebugMsgEx::processOpSetRejectionRate(std::istringstream& commandStream)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
Config* cfg = app->getConfig();
|
||||
std::string rejectionRateStr;
|
||||
std::ostringstream responseStream;
|
||||
|
||||
std::getline(commandStream, rejectionRateStr, ' ');
|
||||
unsigned rejectionRate = StringTk::strToUInt(rejectionRateStr);
|
||||
|
||||
cfg->setConnectionRejectionRate(rejectionRate);
|
||||
|
||||
responseStream << "Setting connection reject rate to " << rejectionRate << std::endl;
|
||||
return responseStream.str();
|
||||
}
|
||||
|
||||
24
storage/source/net/message/nodes/GenericDebugMsgEx.h
Normal file
24
storage/source/net/message/nodes/GenericDebugMsgEx.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/GenericDebugMsg.h>
|
||||
|
||||
|
||||
class GenericDebugMsgEx : public GenericDebugMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
std::string processCommand();
|
||||
|
||||
std::string processOpListOpenFiles(std::istringstream& commandStream);
|
||||
std::string processOpVersion(std::istringstream& commandStream);
|
||||
std::string processOpMsgQueueStats(std::istringstream& commandStream);
|
||||
std::string processOpQuotaExceeded(std::istringstream& commandStream);
|
||||
std::string processOpUsedQuota(std::istringstream& commandStream);
|
||||
std::string processOpResyncQueueLen(std::istringstream& commandStream);
|
||||
std::string processOpChunkLockStoreSize(std::istringstream& commandStream);
|
||||
std::string processOpChunkLockStoreContents(std::istringstream& commandStream);
|
||||
std::string processOpSetRejectionRate(std::istringstream& commandStream);
|
||||
};
|
||||
|
||||
30
storage/source/net/message/nodes/GetClientStatsMsgEx.cpp
Normal file
30
storage/source/net/message/nodes/GetClientStatsMsgEx.cpp
Normal file
@@ -0,0 +1,30 @@
|
||||
#include <program/Program.h>
|
||||
#include <common/net/message/storage/GetHighResStatsRespMsg.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/nodes/OpCounter.h>
|
||||
#include "GetClientStatsMsgEx.h"
|
||||
#include <nodes/StorageNodeOpStats.h>
|
||||
#include <common/net/message/nodes/GetClientStatsRespMsg.h>
|
||||
|
||||
|
||||
/**
|
||||
* Server side, called when the server gets a GetClientStatsMsgEx request
|
||||
*/
|
||||
bool GetClientStatsMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
uint64_t cookieIP = getCookieIP(); // requested is cookie+1
|
||||
|
||||
// get stats
|
||||
StorageNodeOpStats* clientOpStats = Program::getApp()->getNodeOpStats();
|
||||
|
||||
bool wantPerUserStats = isMsgHeaderFeatureFlagSet(GETCLIENTSTATSMSG_FLAG_PERUSERSTATS);
|
||||
UInt64Vector opStatsVec;
|
||||
|
||||
clientOpStats->mapToUInt64Vec(
|
||||
cookieIP, GETCLIENTSTATSRESP_MAX_PAYLOAD_LEN, wantPerUserStats, &opStatsVec);
|
||||
|
||||
ctx.sendResponse(GetClientStatsRespMsg(&opStatsVec) );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
15
storage/source/net/message/nodes/GetClientStatsMsgEx.h
Normal file
15
storage/source/net/message/nodes/GetClientStatsMsgEx.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/net/message/nodes/GetClientStatsMsg.h>
|
||||
|
||||
|
||||
// NOTE: The message factory requires this object to have 'deserialize' and
|
||||
// 'processIncoming' methods. 'deserialize' is derived from other classes.
|
||||
|
||||
class GetClientStatsMsgEx : public GetClientStatsMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
#include <program/Program.h>
|
||||
#include <storage/StorageTargets.h>
|
||||
|
||||
#include <common/net/message/nodes/GetTargetConsistencyStatesRespMsg.h>
|
||||
|
||||
#include "GetTargetConsistencyStatesMsgEx.h"
|
||||
|
||||
bool GetTargetConsistencyStatesMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
StorageTargets* storageTargets = Program::getApp()->getStorageTargets();
|
||||
|
||||
TargetConsistencyStateVec states;
|
||||
std::transform(
|
||||
targetIDs.begin(), targetIDs.end(),
|
||||
std::back_inserter(states),
|
||||
[storageTargets] (uint16_t targetID) {
|
||||
auto* const target = storageTargets->getTarget(targetID);
|
||||
return target ? target->getConsistencyState() : TargetConsistencyState_BAD;
|
||||
});
|
||||
|
||||
ctx.sendResponse(GetTargetConsistencyStatesRespMsg(states));
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/GetTargetConsistencyStatesMsg.h>
|
||||
|
||||
class GetTargetConsistencyStatesMsgEx : public GetTargetConsistencyStatesMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
76
storage/source/net/message/nodes/HeartbeatMsgEx.cpp
Normal file
76
storage/source/net/message/nodes/HeartbeatMsgEx.cpp
Normal file
@@ -0,0 +1,76 @@
|
||||
#include <common/net/sock/NetworkInterfaceCard.h>
|
||||
#include <program/Program.h>
|
||||
#include "HeartbeatMsgEx.h"
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
bool HeartbeatMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
LogContext log("Heartbeat incoming");
|
||||
|
||||
App* app = Program::getApp();
|
||||
bool isNodeNew;
|
||||
|
||||
// construct node
|
||||
|
||||
NicAddressList& nicList = getNicList();
|
||||
|
||||
auto node = std::make_shared<Node>(getNodeType(), getNodeID(), getNodeNumID(), getPortUDP(),
|
||||
getPortTCP(), nicList);
|
||||
|
||||
// set local nic capabilities
|
||||
|
||||
NicAddressList localNicList(app->getLocalNicList() );
|
||||
NicListCapabilities localNicCaps;
|
||||
|
||||
NetworkInterfaceCard::supportedCapabilities(&localNicList, &localNicCaps);
|
||||
node->getConnPool()->setLocalNicList(localNicList, localNicCaps);
|
||||
|
||||
std::string nodeIDWithTypeStr = node->getNodeIDWithTypeStr();
|
||||
|
||||
log.log(Log_DEBUG, std::string("Heartbeat node: ") + nodeIDWithTypeStr);
|
||||
|
||||
// add/update node in store
|
||||
|
||||
AbstractNodeStore* nodes;
|
||||
|
||||
switch(getNodeType() )
|
||||
{
|
||||
case NODETYPE_Meta:
|
||||
nodes = app->getMetaNodes(); break;
|
||||
|
||||
case NODETYPE_Mgmt:
|
||||
nodes = app->getMgmtNodes(); break;
|
||||
|
||||
case NODETYPE_Storage:
|
||||
nodes = app->getStorageNodes(); break;
|
||||
|
||||
default:
|
||||
{
|
||||
log.logErr("Invalid/unexpected node type: "
|
||||
+ boost::lexical_cast<std::string>(getNodeType()));
|
||||
|
||||
goto ack_resp;
|
||||
} break;
|
||||
}
|
||||
|
||||
isNodeNew = (nodes->addOrUpdateNode(std::move(node)) == NodeStoreResult::Added);
|
||||
if(isNodeNew)
|
||||
{ // log info about new server
|
||||
bool supportsRDMA = NetworkInterfaceCard::supportsRDMA(&nicList);
|
||||
|
||||
log.log(Log_WARNING, std::string("New node: ") +
|
||||
nodeIDWithTypeStr + "; " +
|
||||
std::string(supportsRDMA ? "RDMA; " : "") );
|
||||
}
|
||||
|
||||
|
||||
ack_resp:
|
||||
acknowledge(ctx);
|
||||
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_HEARTBEAT,
|
||||
getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
10
storage/source/net/message/nodes/HeartbeatMsgEx.h
Normal file
10
storage/source/net/message/nodes/HeartbeatMsgEx.h
Normal file
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/HeartbeatMsg.h>
|
||||
|
||||
class HeartbeatMsgEx : public HeartbeatMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
28
storage/source/net/message/nodes/HeartbeatRequestMsgEx.cpp
Normal file
28
storage/source/net/message/nodes/HeartbeatRequestMsgEx.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
#include <common/net/message/nodes/HeartbeatMsg.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <program/Program.h>
|
||||
#include "HeartbeatRequestMsgEx.h"
|
||||
|
||||
bool HeartbeatRequestMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
LogContext log("Heartbeat request incoming");
|
||||
App* app = Program::getApp();
|
||||
Config* cfg = app->getConfig();
|
||||
|
||||
Node& localNode = app->getLocalNode();
|
||||
NumNodeID localNodeNumID = localNode.getNumID();
|
||||
NicAddressList nicList(localNode.getNicList() );
|
||||
|
||||
HeartbeatMsg hbMsg(localNode.getAlias(), localNodeNumID, NODETYPE_Storage, &nicList);
|
||||
hbMsg.setPorts(cfg->getConnStoragePort(), cfg->getConnStoragePort() );
|
||||
|
||||
ctx.sendResponse(hbMsg);
|
||||
|
||||
log.log(Log_DEBUG, std::string("Heartbeat req ip:") + StringTk::uintToHexStr(ctx.getSocket()->getPeerIP()));
|
||||
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_HEARTBEAT,
|
||||
getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
10
storage/source/net/message/nodes/HeartbeatRequestMsgEx.h
Normal file
10
storage/source/net/message/nodes/HeartbeatRequestMsgEx.h
Normal file
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
|
||||
|
||||
class HeartbeatRequestMsgEx : public HeartbeatRequestMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
44
storage/source/net/message/nodes/MapTargetsMsgEx.cpp
Normal file
44
storage/source/net/message/nodes/MapTargetsMsgEx.cpp
Normal file
@@ -0,0 +1,44 @@
|
||||
#include <common/net/message/nodes/MapTargetsRespMsg.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/toolkit/ZipIterator.h>
|
||||
#include <program/Program.h>
|
||||
#include "MapTargetsMsgEx.h"
|
||||
|
||||
|
||||
bool MapTargetsMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
LogContext log("MapTargetsMsg incoming");
|
||||
|
||||
const App* app = Program::getApp();
|
||||
const NodeStoreServers* storageNodes = app->getStorageNodes();
|
||||
TargetMapper* targetMapper = app->getTargetMapper();
|
||||
|
||||
const NumNodeID nodeID = getNodeID();
|
||||
std::map<uint16_t, FhgfsOpsErr> results;
|
||||
|
||||
for (const auto mapping : getTargets())
|
||||
{
|
||||
const auto targetId = mapping.first;
|
||||
const auto poolId = mapping.second;
|
||||
|
||||
const auto mapRes = targetMapper->mapTarget(targetId, nodeID, poolId);
|
||||
|
||||
results[targetId] = mapRes.first;
|
||||
|
||||
if ( (mapRes.first != FhgfsOpsErr_SUCCESS) && (mapRes.second) )
|
||||
{ // target could be mapped and is new
|
||||
LOG_DEBUG_CONTEXT(log, Log_WARNING, "Mapping "
|
||||
"target " + StringTk::uintToStr(targetId) +
|
||||
" => " +
|
||||
storageNodes->getNodeIDWithTypeStr(nodeID) );
|
||||
|
||||
IGNORE_UNUSED_VARIABLE(storageNodes);
|
||||
}
|
||||
}
|
||||
|
||||
if(!acknowledge(ctx) )
|
||||
ctx.sendResponse(MapTargetsRespMsg(results));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
11
storage/source/net/message/nodes/MapTargetsMsgEx.h
Normal file
11
storage/source/net/message/nodes/MapTargetsMsgEx.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/MapTargetsMsg.h>
|
||||
|
||||
class MapTargetsMsgEx : public MapTargetsMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
|
||||
21
storage/source/net/message/nodes/PublishCapacitiesMsgEx.cpp
Normal file
21
storage/source/net/message/nodes/PublishCapacitiesMsgEx.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
#include "PublishCapacitiesMsgEx.h"
|
||||
|
||||
|
||||
bool PublishCapacitiesMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
InternodeSyncer* syncer = app->getInternodeSyncer();
|
||||
|
||||
|
||||
// force upload of capacity information
|
||||
syncer->setForcePublishCapacities();
|
||||
|
||||
// send response
|
||||
acknowledge(ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
12
storage/source/net/message/nodes/PublishCapacitiesMsgEx.h
Normal file
12
storage/source/net/message/nodes/PublishCapacitiesMsgEx.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/PublishCapacitiesMsg.h>
|
||||
|
||||
|
||||
class PublishCapacitiesMsgEx : public PublishCapacitiesMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
#include "RefreshTargetStatesMsgEx.h"
|
||||
|
||||
|
||||
bool RefreshTargetStatesMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
InternodeSyncer* syncer = app->getInternodeSyncer();
|
||||
|
||||
|
||||
// force update of capacity pools
|
||||
syncer->setForceTargetStatesUpdate();
|
||||
|
||||
// send response
|
||||
acknowledge(ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
11
storage/source/net/message/nodes/RefreshTargetStatesMsgEx.h
Normal file
11
storage/source/net/message/nodes/RefreshTargetStatesMsgEx.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/RefreshTargetStatesMsg.h>
|
||||
|
||||
|
||||
class RefreshTargetStatesMsgEx : public RefreshTargetStatesMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
124
storage/source/net/message/nodes/RemoveBuddyGroupMsgEx.cpp
Normal file
124
storage/source/net/message/nodes/RemoveBuddyGroupMsgEx.cpp
Normal file
@@ -0,0 +1,124 @@
|
||||
#include "RemoveBuddyGroupMsgEx.h"
|
||||
|
||||
#include <common/net/message/nodes/RemoveBuddyGroupRespMsg.h>
|
||||
#include <net/message/storage/listing/ListChunkDirIncrementalMsgEx.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
static FhgfsOpsErr checkChunkDirRemovable(const int dirFD)
|
||||
{
|
||||
DIR* dir = fdopendir(dirFD);
|
||||
std::unique_ptr<DIR, StorageTk::CloseDirDeleter> _dir(dir);
|
||||
|
||||
while (true)
|
||||
{
|
||||
struct dirent* result;
|
||||
|
||||
#if USE_READDIR_R
|
||||
struct dirent buffer;
|
||||
if (readdir_r(dir, &buffer, &result) != 0)
|
||||
break;
|
||||
#else
|
||||
errno = 0;
|
||||
result = readdir(dir);
|
||||
if (!result && errno)
|
||||
break;
|
||||
#endif
|
||||
|
||||
if (!result)
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
|
||||
if (strcmp(result->d_name, ".") == 0 || strcmp(result->d_name, "..") == 0)
|
||||
continue;
|
||||
|
||||
struct stat statData;
|
||||
|
||||
const int statRes = ::fstatat(dirfd(dir), result->d_name, &statData, AT_SYMLINK_NOFOLLOW);
|
||||
if (statRes != 0)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not stat something in chunk directory.");
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
if (!S_ISDIR(statData.st_mode))
|
||||
return FhgfsOpsErr_NOTEMPTY;
|
||||
|
||||
const int subdir = ::openat(dirfd(dir), result->d_name, O_RDONLY);
|
||||
if (subdir < 0)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not open directory in chunk path.");
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
const FhgfsOpsErr checkRes = checkChunkDirRemovable(subdir);
|
||||
if (checkRes != FhgfsOpsErr_SUCCESS)
|
||||
return checkRes;
|
||||
}
|
||||
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
bool RemoveBuddyGroupMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
|
||||
if (type != NODETYPE_Storage)
|
||||
{
|
||||
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
|
||||
return true;
|
||||
}
|
||||
|
||||
uint16_t targetID = app->getMirrorBuddyGroupMapper()->getPrimaryTargetID(groupID);
|
||||
if (app->getTargetMapper()->getNodeID(targetID) != app->getLocalNode().getNumID())
|
||||
targetID = app->getMirrorBuddyGroupMapper()->getSecondaryTargetID(groupID);
|
||||
if (app->getTargetMapper()->getNodeID(targetID) != app->getLocalNode().getNumID())
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Group is not mapped on this target.", groupID);
|
||||
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
|
||||
return true;
|
||||
}
|
||||
|
||||
auto* const target = app->getStorageTargets()->getTarget(targetID);
|
||||
if (!target)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not open directory file descriptor.", groupID);
|
||||
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
|
||||
return true;
|
||||
}
|
||||
|
||||
const int dirFD = openat(*target->getMirrorFD(), ".", O_RDONLY);
|
||||
if (dirFD < 0)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not open directory file descriptor.", groupID);
|
||||
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
|
||||
return true;
|
||||
}
|
||||
|
||||
const FhgfsOpsErr checkRes = checkChunkDirRemovable(dirFD);
|
||||
|
||||
const bool forceAndNotEmpty = checkRes == FhgfsOpsErr_NOTEMPTY && force;
|
||||
|
||||
if (checkRes == FhgfsOpsErr_SUCCESS || forceAndNotEmpty)
|
||||
{
|
||||
if (!checkOnly)
|
||||
{
|
||||
auto* const bgm = Program::getApp()->getMirrorBuddyGroupMapper();
|
||||
const NumNodeID localID = Program::getApp()->getLocalNode().getNumID();
|
||||
|
||||
if (!bgm->unmapMirrorBuddyGroup(groupID, localID))
|
||||
{
|
||||
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_SUCCESS));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx.sendResponse(RemoveBuddyGroupRespMsg(checkRes));
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
10
storage/source/net/message/nodes/RemoveBuddyGroupMsgEx.h
Normal file
10
storage/source/net/message/nodes/RemoveBuddyGroupMsgEx.h
Normal file
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/RemoveBuddyGroupMsg.h>
|
||||
|
||||
class RemoveBuddyGroupMsgEx : public RemoveBuddyGroupMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
37
storage/source/net/message/nodes/RemoveNodeMsgEx.cpp
Normal file
37
storage/source/net/message/nodes/RemoveNodeMsgEx.cpp
Normal file
@@ -0,0 +1,37 @@
|
||||
#include <common/net/message/nodes/RemoveNodeRespMsg.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <program/Program.h>
|
||||
#include "RemoveNodeMsgEx.h"
|
||||
|
||||
|
||||
bool RemoveNodeMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
|
||||
LOG_DBG(GENERAL, SPAM, "Removing node.", getNodeNumID());
|
||||
|
||||
if (getNodeType() == NODETYPE_Storage)
|
||||
{
|
||||
NodeStoreServers* nodes = app->getStorageNodes();
|
||||
auto node = nodes->referenceNode(getNodeNumID());
|
||||
bool delRes = nodes->deleteNode(getNodeNumID());
|
||||
|
||||
// log
|
||||
if (delRes)
|
||||
{
|
||||
LOG(GENERAL, WARNING, "Node removed.", ("node", node->getNodeIDWithTypeStr()));
|
||||
LOG(GENERAL, WARNING, "Number of nodes in the system:",
|
||||
("meta", app->getMetaNodes()->getSize()),
|
||||
("storage", app->getStorageNodes()->getSize()));
|
||||
}
|
||||
}
|
||||
|
||||
if (!acknowledge(ctx))
|
||||
ctx.sendResponse(RemoveNodeRespMsg(0));
|
||||
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_REMOVENODE,
|
||||
getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
11
storage/source/net/message/nodes/RemoveNodeMsgEx.h
Normal file
11
storage/source/net/message/nodes/RemoveNodeMsgEx.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/RemoveNodeMsg.h>
|
||||
|
||||
class RemoveNodeMsgEx : public RemoveNodeMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
#include <common/net/message/nodes/SetMirrorBuddyGroupRespMsg.h>
|
||||
#include <common/nodes/MirrorBuddyGroupMapper.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
#include "SetMirrorBuddyGroupMsgEx.h"
|
||||
|
||||
bool SetMirrorBuddyGroupMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
uint16_t buddyGroupID = this->getBuddyGroupID();
|
||||
|
||||
if (getNodeType() != NODETYPE_Storage)
|
||||
{
|
||||
// The storage server has no mapper for meta buddy groups - nothing to do, just acknowledge
|
||||
if (!acknowledge(ctx))
|
||||
ctx.sendResponse(SetMirrorBuddyGroupRespMsg(FhgfsOpsErr_SUCCESS, buddyGroupID));
|
||||
return true;
|
||||
}
|
||||
|
||||
App* app = Program::getApp();
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
uint16_t primaryTargetID = this->getPrimaryTargetID();
|
||||
uint16_t secondaryTargetID = this->getSecondaryTargetID();
|
||||
bool allowUpdate = this->getAllowUpdate();
|
||||
uint16_t newBuddyGroupID = 0;
|
||||
|
||||
FhgfsOpsErr mapResult = buddyGroupMapper->mapMirrorBuddyGroup(buddyGroupID, primaryTargetID,
|
||||
secondaryTargetID, app->getLocalNode().getNumID(), allowUpdate, &newBuddyGroupID);
|
||||
|
||||
if(!acknowledge(ctx) )
|
||||
ctx.sendResponse(SetMirrorBuddyGroupRespMsg(mapResult, newBuddyGroupID) );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
11
storage/source/net/message/nodes/SetMirrorBuddyGroupMsgEx.h
Normal file
11
storage/source/net/message/nodes/SetMirrorBuddyGroupMsgEx.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/SetMirrorBuddyGroupMsg.h>
|
||||
|
||||
class SetMirrorBuddyGroupMsgEx : public SetMirrorBuddyGroupMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
|
||||
#include <common/nodes/TargetStateStore.h>
|
||||
#include <common/toolkit/ZipIterator.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
#include "SetTargetConsistencyStatesMsgEx.h"
|
||||
|
||||
bool SetTargetConsistencyStatesMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
StorageTargets* storageTargets = app->getStorageTargets();
|
||||
FhgfsOpsErr result = FhgfsOpsErr_SUCCESS;
|
||||
|
||||
if (getTargetIDs().size() != getStates().size())
|
||||
{
|
||||
LogContext(__func__).logErr("Different list size of targetIDs and states");
|
||||
result = FhgfsOpsErr_INTERNAL;
|
||||
goto send_response;
|
||||
}
|
||||
|
||||
for (ZipIterRange<UInt16List, UInt8List> idStateIter(getTargetIDs(), getStates());
|
||||
!idStateIter.empty(); ++idStateIter)
|
||||
{
|
||||
auto* const target = storageTargets->getTarget(*idStateIter()->first);
|
||||
if (!target)
|
||||
{
|
||||
LogContext(__func__).logErr("Unknown targetID: " +
|
||||
StringTk::uintToStr(*(idStateIter()->first) ) );
|
||||
result = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
goto send_response;
|
||||
}
|
||||
|
||||
target->setState(TargetConsistencyState(*idStateIter()->second));
|
||||
}
|
||||
|
||||
send_response:
|
||||
ctx.sendResponse(SetTargetConsistencyStatesRespMsg(result) );
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
|
||||
|
||||
class SetTargetConsistencyStatesMsgEx : public SetTargetConsistencyStatesMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
#include <app/App.h>
|
||||
#include <common/net/message/nodes/StorageBenchControlMsgResp.h>
|
||||
#include <components/benchmarker/StorageBenchOperator.h>
|
||||
#include <program/Program.h>
|
||||
#include "StorageBenchControlMsgEx.h"
|
||||
|
||||
bool StorageBenchControlMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
const char* logContext = "StorageBenchControlMsg incoming";
|
||||
|
||||
StorageBenchResultsMap results;
|
||||
int cmdErrorCode = STORAGEBENCH_ERROR_NO_ERROR;
|
||||
|
||||
App* app = Program::getApp();
|
||||
StorageBenchOperator* storageBench = app->getStorageBenchOperator();
|
||||
|
||||
switch(getAction())
|
||||
{
|
||||
case StorageBenchAction_START:
|
||||
{
|
||||
cmdErrorCode = storageBench->initAndStartStorageBench(&getTargetIDs(), getBlocksize(),
|
||||
getSize(), getThreads(), getODirect(), getType() );
|
||||
} break;
|
||||
|
||||
case StorageBenchAction_STOP:
|
||||
{
|
||||
cmdErrorCode = storageBench->stopBenchmark();
|
||||
} break;
|
||||
|
||||
case StorageBenchAction_STATUS:
|
||||
{
|
||||
storageBench->getStatusWithResults(&getTargetIDs(), &results);
|
||||
cmdErrorCode = STORAGEBENCH_ERROR_NO_ERROR;
|
||||
} break;
|
||||
|
||||
case StorageBenchAction_CLEANUP:
|
||||
{
|
||||
cmdErrorCode = storageBench->cleanup(&getTargetIDs());
|
||||
} break;
|
||||
|
||||
default:
|
||||
{
|
||||
LogContext(logContext).logErr("unknown action!");
|
||||
} break;
|
||||
}
|
||||
|
||||
int errorCode;
|
||||
|
||||
// check if the last command from the fhgfs_cmd was successful,
|
||||
// if not send the error code of the command to the fhgfs_cmd
|
||||
// if it was successful, send the error code of the last run or acutely run of the benchmark
|
||||
if (cmdErrorCode != STORAGEBENCH_ERROR_NO_ERROR)
|
||||
{
|
||||
errorCode = cmdErrorCode;
|
||||
}
|
||||
else
|
||||
{
|
||||
errorCode = storageBench->getLastRunErrorCode();
|
||||
}
|
||||
|
||||
ctx.sendResponse(
|
||||
StorageBenchControlMsgResp(storageBench->getStatus(), getAction(),
|
||||
storageBench->getType(), errorCode, results) );
|
||||
|
||||
return true;
|
||||
}
|
||||
11
storage/source/net/message/nodes/StorageBenchControlMsgEx.h
Normal file
11
storage/source/net/message/nodes/StorageBenchControlMsgEx.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/StorageBenchControlMsg.h>
|
||||
#include <common/Common.h>
|
||||
|
||||
class StorageBenchControlMsgEx: public StorageBenchControlMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
#include "RefreshStoragePoolsMsgEx.h"
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
bool RefreshStoragePoolsMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
Program::getApp()->getInternodeSyncer()->setForceStoragePoolsUpdate();
|
||||
|
||||
// can only come as an AcknowledgableMsg from mgmtd
|
||||
acknowledge(ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/nodes/storagepools/RefreshStoragePoolsMsg.h>
|
||||
|
||||
class RefreshStoragePoolsMsgEx : public RefreshStoragePoolsMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
94
storage/source/net/message/session/FSyncLocalFileMsgEx.cpp
Normal file
94
storage/source/net/message/session/FSyncLocalFileMsgEx.cpp
Normal file
@@ -0,0 +1,94 @@
|
||||
#include <program/Program.h>
|
||||
#include <common/net/message/session/FSyncLocalFileRespMsg.h>
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <net/msghelpers/MsgHelperIO.h>
|
||||
#include "FSyncLocalFileMsgEx.h"
|
||||
|
||||
|
||||
bool FSyncLocalFileMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
ctx.sendResponse(FSyncLocalFileRespMsg(fsync()));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
FhgfsOpsErr FSyncLocalFileMsgEx::fsync()
|
||||
{
|
||||
const char* logContext = "FSyncLocalFileMsg incoming";
|
||||
|
||||
FhgfsOpsErr clientRes = FhgfsOpsErr_SUCCESS;
|
||||
bool isMirrorSession = isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_BUDDYMIRROR);
|
||||
|
||||
// do session check only when it is not a mirror session
|
||||
bool useSessionCheck = isMirrorSession ? false :
|
||||
isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_SESSION_CHECK);
|
||||
|
||||
App* app = Program::getApp();
|
||||
SessionStore* sessions = app->getSessions();
|
||||
auto session = sessions->referenceOrAddSession(getSessionID());
|
||||
SessionLocalFileStore* sessionLocalFiles = session->getLocalFiles();
|
||||
|
||||
// select the right targetID
|
||||
|
||||
uint16_t targetID = getTargetID();
|
||||
|
||||
if(isMirrorSession)
|
||||
{ // given targetID refers to a buddy mirror group
|
||||
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
targetID = isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
|
||||
mirrorBuddies->getSecondaryTargetID(targetID) :
|
||||
mirrorBuddies->getPrimaryTargetID(targetID);
|
||||
|
||||
// note: only log message here, error handling will happen below through invalid targetFD
|
||||
if(unlikely(!targetID) )
|
||||
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
|
||||
StringTk::uintToStr(getTargetID() ) );
|
||||
}
|
||||
|
||||
auto sessionLocalFile =
|
||||
sessionLocalFiles->referenceSession(getFileHandleID(), targetID, isMirrorSession);
|
||||
|
||||
if(sessionLocalFile)
|
||||
{ // sessionLocalFile exists => check if open and perform fsync
|
||||
if (!isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_NO_SYNC) )
|
||||
{
|
||||
auto& fd = sessionLocalFile->getFD();
|
||||
if (fd.valid())
|
||||
{ // file open => sync
|
||||
int fsyncRes = MsgHelperIO::fsync(*fd);
|
||||
|
||||
if(fsyncRes)
|
||||
{
|
||||
LogContext log(logContext);
|
||||
log.log(Log_WARNING, std::string("fsync of chunk file failed. ") +
|
||||
std::string("SessionID: ") + getSessionID().str() +
|
||||
std::string(". SysErr: ") + System::getErrString() );
|
||||
|
||||
clientRes = FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(useSessionCheck && sessionLocalFile->isServerCrashed() )
|
||||
{ // server crashed during the write, maybe lost some data send error to client
|
||||
LogContext log(logContext);
|
||||
log.log(Log_SPAM, "Potential cache loss for open file handle. (Server crash detected.) "
|
||||
"The session is marked as dirty.");
|
||||
clientRes = FhgfsOpsErr_STORAGE_SRV_CRASHED;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (useSessionCheck)
|
||||
{ // the server crashed during a write or before the close was successful
|
||||
LogContext log(logContext);
|
||||
log.log(Log_WARNING, "Potential cache loss for open file handle. (Server crash detected.) "
|
||||
"No session for file available. "
|
||||
"FileHandleID: " + std::string(getFileHandleID()) );
|
||||
|
||||
clientRes = FhgfsOpsErr_STORAGE_SRV_CRASHED;
|
||||
}
|
||||
|
||||
return clientRes;
|
||||
}
|
||||
13
storage/source/net/message/session/FSyncLocalFileMsgEx.h
Normal file
13
storage/source/net/message/session/FSyncLocalFileMsgEx.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/session/FSyncLocalFileMsg.h>
|
||||
|
||||
class FSyncLocalFileMsgEx : public FSyncLocalFileMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
FhgfsOpsErr fsync();
|
||||
};
|
||||
|
||||
@@ -0,0 +1,252 @@
|
||||
#include <common/net/message/control/GenericResponseMsg.h>
|
||||
#include <common/net/message/session/opening/CloseChunkFileRespMsg.h>
|
||||
#include <common/toolkit/SessionTk.h>
|
||||
#include <net/msghelpers/MsgHelperIO.h>
|
||||
#include <program/Program.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include "CloseChunkFileMsgEx.h"
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
bool CloseChunkFileMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
|
||||
FhgfsOpsErr closeMsgRes;
|
||||
DynamicAttribs dynAttribs;
|
||||
|
||||
std::tie(closeMsgRes, dynAttribs) = close(ctx);
|
||||
// if closeMsgRes == FhgfsOpsErr_COMMUNICATION, a GenericResponseMsg has been sent already
|
||||
if (closeMsgRes != FhgfsOpsErr_COMMUNICATION)
|
||||
ctx.sendResponse(
|
||||
CloseChunkFileRespMsg(closeMsgRes, dynAttribs.filesize, dynAttribs.allocedBlocks,
|
||||
dynAttribs.modificationTimeSecs, dynAttribs.lastAccessTimeSecs,
|
||||
dynAttribs.storageVersion) );
|
||||
|
||||
// update op counters
|
||||
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_CLOSELOCAL,
|
||||
getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::pair<FhgfsOpsErr, CloseChunkFileMsgEx::DynamicAttribs> CloseChunkFileMsgEx::close(
|
||||
ResponseContext& ctx)
|
||||
{
|
||||
const char* logContext = "CloseChunkFileMsg incoming";
|
||||
|
||||
App* app = Program::getApp();
|
||||
Config* config = app->getConfig();
|
||||
SessionStore* sessions = app->getSessions();
|
||||
|
||||
uint16_t targetID;
|
||||
|
||||
FhgfsOpsErr closeMsgRes = FhgfsOpsErr_SUCCESS; // the result that will be sent to requestor
|
||||
DynamicAttribs dynAttribs = {0, 0, 0, 0, 0};
|
||||
|
||||
std::string fileHandleID(getFileHandleID() );
|
||||
bool isMirrorSession = isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR);
|
||||
|
||||
SessionLocalFileStore* sessionLocalFiles;
|
||||
|
||||
// select the right targetID
|
||||
|
||||
targetID = getTargetID();
|
||||
|
||||
if(isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR) )
|
||||
{ // given targetID refers to a buddy mirror group
|
||||
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
targetID = isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
|
||||
mirrorBuddies->getSecondaryTargetID(targetID) :
|
||||
mirrorBuddies->getPrimaryTargetID(targetID);
|
||||
|
||||
if(unlikely(!targetID) )
|
||||
{ // unknown target
|
||||
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
|
||||
StringTk::uintToStr(getTargetID() ) );
|
||||
return {FhgfsOpsErr_UNKNOWNTARGET, {}};
|
||||
}
|
||||
}
|
||||
|
||||
// forward to secondary (if appropriate)
|
||||
|
||||
closeMsgRes = forwardToSecondary(ctx);
|
||||
if (unlikely(closeMsgRes != FhgfsOpsErr_SUCCESS))
|
||||
return {closeMsgRes, dynAttribs};
|
||||
|
||||
auto session = sessions->referenceOrAddSession(getSessionID());
|
||||
sessionLocalFiles = session->getLocalFiles();
|
||||
|
||||
auto fsState = sessionLocalFiles->removeSession(fileHandleID, targetID, isMirrorSession);
|
||||
|
||||
// get current dynamic file attribs
|
||||
|
||||
if (fsState)
|
||||
{ // file no longer in use => refresh filesize and close file fd
|
||||
auto& fd = fsState->getFD();
|
||||
|
||||
/* get dynamic attribs, here before closing the file.
|
||||
* Note: Depending on the underlying file system the returned st_blocks might be too large
|
||||
* (pre-allocated blocks, which are only released on close() ). Advantage here is
|
||||
* that we already have the file descriptor. */
|
||||
if( (config->getTuneEarlyStat() ) &&
|
||||
(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS) ) )
|
||||
getDynamicAttribsByFD(*fd, fileHandleID, targetID, dynAttribs);
|
||||
|
||||
// close fd
|
||||
|
||||
if (!fsState->close())
|
||||
closeMsgRes = FhgfsOpsErr_INTERNAL;
|
||||
|
||||
// only get the attributes here, in order to make xfs to release pre-allocated blocks
|
||||
if( (!config->getTuneEarlyStat() ) &&
|
||||
(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS) ) )
|
||||
getDynamicAttribsByPath(fileHandleID, targetID, dynAttribs);
|
||||
|
||||
}
|
||||
else
|
||||
if(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS) )
|
||||
{ // file still in use by other threads => get dynamic attribs by path
|
||||
|
||||
bool getRes = getDynamicAttribsByPath(fileHandleID, targetID, dynAttribs);
|
||||
if (getRes)
|
||||
{
|
||||
// LogContext(logContext).log(Log_DEBUG, "Chunk file virtually closed. "
|
||||
// "HandleID: " + fileHandleID);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// note: "file not exists" is not an error. we just have nothing to do in that case.
|
||||
|
||||
return {closeMsgRes, dynAttribs};
|
||||
}
|
||||
|
||||
/**
|
||||
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
|
||||
*
|
||||
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
|
||||
* case *outChunkLocked==false is guaranteed).
|
||||
* @throw SocketException if sending of GenericResponseMsg fails.
|
||||
*/
|
||||
FhgfsOpsErr CloseChunkFileMsgEx::forwardToSecondary(ResponseContext& ctx)
|
||||
{
|
||||
const char* logContext = "CloseChunkFileMsg incoming (forward to secondary)";
|
||||
|
||||
App* app = Program::getApp();
|
||||
|
||||
if(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR) ||
|
||||
isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
|
||||
return FhgfsOpsErr_SUCCESS; // nothing to do
|
||||
|
||||
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
|
||||
addMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND);
|
||||
|
||||
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_CloseChunkFileResp);
|
||||
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
|
||||
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
|
||||
|
||||
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
|
||||
|
||||
// remove the flag that we just added for secondary
|
||||
unsetMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND);
|
||||
|
||||
if(unlikely(
|
||||
(commRes == FhgfsOpsErr_COMMUNICATION) &&
|
||||
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
|
||||
{
|
||||
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );;
|
||||
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
|
||||
}
|
||||
|
||||
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
LogContext(logContext).log(Log_DEBUG, "Forwarding failed. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
|
||||
"error: " + boost::lexical_cast<std::string>(commRes));
|
||||
|
||||
std::string genericRespStr = "Communication with secondary failed. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
|
||||
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(genericRespStr)));
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
CloseChunkFileRespMsg* respMsg = (CloseChunkFileRespMsg*)rrArgs.outRespMsg.get();
|
||||
FhgfsOpsErr secondaryRes = respMsg->getResult();
|
||||
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
|
||||
boost::lexical_cast<std::string>(secondaryRes) + "; "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
return secondaryRes;
|
||||
}
|
||||
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
bool CloseChunkFileMsgEx::getDynamicAttribsByFD(const int fd, std::string fileHandleID,
|
||||
uint16_t targetID, DynamicAttribs& outDynAttribs)
|
||||
{
|
||||
SyncedStoragePaths* syncedPaths = Program::getApp()->getSyncedStoragePaths();
|
||||
|
||||
std::string fileID(SessionTk::fileIDFromHandleID(fileHandleID) );
|
||||
|
||||
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // LOCK
|
||||
|
||||
// note: this is locked because we need to get the filesize together with the storageVersion
|
||||
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(fd, &outDynAttribs.filesize,
|
||||
&outDynAttribs.allocedBlocks, &outDynAttribs.modificationTimeSecs,
|
||||
&outDynAttribs.lastAccessTimeSecs);
|
||||
|
||||
if(getDynAttribsRes)
|
||||
outDynAttribs.storageVersion = storageVersion;
|
||||
|
||||
syncedPaths->unlockPath(fileID, targetID); // UNLOCK
|
||||
|
||||
return getDynAttribsRes;
|
||||
}
|
||||
|
||||
bool CloseChunkFileMsgEx::getDynamicAttribsByPath(std::string fileHandleID, uint16_t targetID,
|
||||
DynamicAttribs& outDynAttribs)
|
||||
{
|
||||
const char* logContext = "CloseChunkFileMsg (attribs by path)";
|
||||
|
||||
App* app = Program::getApp();
|
||||
SyncedStoragePaths* syncedPaths = app->getSyncedStoragePaths();
|
||||
|
||||
auto* const target = app->getStorageTargets()->getTarget(targetID);
|
||||
if (!target)
|
||||
{ // unknown targetID
|
||||
LogContext(logContext).logErr("Unknown targetID: " + StringTk::uintToStr(targetID) );
|
||||
return false;
|
||||
}
|
||||
|
||||
const int targetFD = isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR)
|
||||
? *target->getMirrorFD()
|
||||
: *target->getChunkFD();
|
||||
|
||||
std::string fileID = SessionTk::fileIDFromHandleID(fileHandleID);
|
||||
std::string pathStr = StorageTk::getFileChunkPath(getPathInfo(), fileID);
|
||||
|
||||
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // L O C K path
|
||||
|
||||
// note: this is locked because we need to get the filesize together with the storageVersion
|
||||
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(targetFD, pathStr.c_str(),
|
||||
&outDynAttribs.filesize, &outDynAttribs.allocedBlocks, &outDynAttribs.modificationTimeSecs,
|
||||
&outDynAttribs.lastAccessTimeSecs);
|
||||
|
||||
if(getDynAttribsRes)
|
||||
outDynAttribs.storageVersion = storageVersion;
|
||||
|
||||
syncedPaths->unlockPath(fileID, targetID); // U N L O C K path
|
||||
|
||||
return getDynAttribsRes;
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/session/opening/CloseChunkFileMsg.h>
|
||||
|
||||
class CloseChunkFileMsgEx : public CloseChunkFileMsg
|
||||
{
|
||||
private:
|
||||
struct DynamicAttribs
|
||||
{
|
||||
int64_t filesize;
|
||||
int64_t allocedBlocks; // allocated 512byte blocks (relevant for sparse files)
|
||||
int64_t modificationTimeSecs;
|
||||
int64_t lastAccessTimeSecs;
|
||||
uint64_t storageVersion;
|
||||
};
|
||||
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
FhgfsOpsErr forwardToSecondary(ResponseContext& ctx);
|
||||
bool getDynamicAttribsByFD(int fd, std::string fileHandleID, uint16_t targetID,
|
||||
DynamicAttribs& outDynAttribs);
|
||||
bool getDynamicAttribsByPath(std::string fileHandleID, uint16_t targetID,
|
||||
DynamicAttribs& outDynAttribs);
|
||||
|
||||
std::pair<FhgfsOpsErr, DynamicAttribs> close(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
114
storage/source/net/message/session/rw/ReadLocalFileRDMAMsgEx.h
Normal file
114
storage/source/net/message/session/rw/ReadLocalFileRDMAMsgEx.h
Normal file
@@ -0,0 +1,114 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef BEEGFS_NVFS
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <common/net/message/session/rw/ReadLocalFileRDMAMsg.h>
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/components/worker/Worker.h>
|
||||
#include <session/SessionLocalFileStore.h>
|
||||
#include "ReadLocalFileV2MsgEx.h"
|
||||
|
||||
/**
|
||||
* Implements RDMA write protocol.
|
||||
*/
|
||||
class ReadLocalFileRDMAMsgSender : public ReadLocalFileRDMAMsg
|
||||
{
|
||||
public:
|
||||
struct ReadState : public ReadStateBase
|
||||
{
|
||||
RdmaInfo* rdma;
|
||||
uint64_t rBuf;
|
||||
size_t rLen;
|
||||
uint64_t rOff;
|
||||
|
||||
ReadState(const char* logContext, uint64_t toBeRead,
|
||||
SessionLocalFile* sessionLocalFile) :
|
||||
ReadStateBase(logContext, toBeRead, sessionLocalFile) {}
|
||||
|
||||
};
|
||||
|
||||
private:
|
||||
friend class ReadLocalFileMsgExBase<ReadLocalFileRDMAMsgSender, ReadState>;
|
||||
|
||||
static std::string logContextPref;
|
||||
|
||||
inline void sendLengthInfo(Socket* sock, int64_t lengthInfo)
|
||||
{
|
||||
lengthInfo = HOST_TO_LE_64(lengthInfo);
|
||||
sock->send(&lengthInfo, sizeof(int64_t), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* RDMA write data to the remote buffer.
|
||||
*/
|
||||
inline ssize_t readStateSendData(Socket* sock, ReadState& rs, char* buf, bool isFinal)
|
||||
{
|
||||
ssize_t writeRes = sock->write(buf, rs.readRes, 0, rs.rBuf + rs.rOff, rs.rdma->key);
|
||||
LOG_DEBUG(rs.logContext, Log_DEBUG,
|
||||
"buf: " + StringTk::uint64ToHexStr((uint64_t)buf) + "; "
|
||||
"bufLen: " + StringTk::int64ToStr(rs.readRes) + "; "
|
||||
"rbuf: " + StringTk::uint64ToHexStr(rs.rBuf) + "; "
|
||||
"rkey: " + StringTk::uintToHexStr(rs.rdma->key) + "; "
|
||||
"writeRes: " + StringTk::int64ToStr(writeRes));
|
||||
|
||||
if (unlikely(writeRes != rs.readRes))
|
||||
{
|
||||
LogContext(rs.logContext).logErr("Unable to write file data to client. "
|
||||
"FileID: " + rs.sessionLocalFile->getFileID() + "; "
|
||||
"SysErr: " + System::getErrString());
|
||||
writeRes = -1;
|
||||
}
|
||||
|
||||
if (isFinal && likely(writeRes >= 0))
|
||||
sendLengthInfo(sock, getCount() - rs.toBeRead);
|
||||
|
||||
return writeRes;
|
||||
}
|
||||
|
||||
inline ssize_t getReadLength(ReadState& rs, ssize_t len)
|
||||
{
|
||||
// Cannot RDMA anything larger than WORKER_BUFOUT_SIZE in a single operation
|
||||
// because that is the size of the buffer passed in by the Worker.
|
||||
// TODO: pass around a Buffer with a length instead of unqualified char*.
|
||||
return BEEGFS_MIN(BEEGFS_MIN(len, ssize_t(rs.rLen - rs.rOff)), WORKER_BUFOUT_SIZE);
|
||||
}
|
||||
|
||||
inline bool readStateInit(ReadState& rs)
|
||||
{
|
||||
rs.rdma = getRdmaInfo();
|
||||
if (unlikely(!rs.rdma->next(rs.rBuf, rs.rLen, rs.rOff)))
|
||||
{
|
||||
LogContext(rs.logContext).logErr("No entities in RDMA buffers.");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool readStateNext(ReadState& rs)
|
||||
{
|
||||
rs.rOff += rs.readRes;
|
||||
if (rs.rOff == rs.rLen)
|
||||
{
|
||||
if (unlikely(!rs.rdma->next(rs.rBuf, rs.rLen, rs.rOff)))
|
||||
{
|
||||
LogContext(rs.logContext).logErr("RDMA buffers exhausted");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline size_t getBuffers(ResponseContext& ctx, char** dataBuf, char** sendBuf)
|
||||
{
|
||||
*dataBuf = ctx.getBuffer();
|
||||
*sendBuf = *dataBuf;
|
||||
return ctx.getBufferLength();
|
||||
}
|
||||
};
|
||||
|
||||
typedef ReadLocalFileMsgExBase<ReadLocalFileRDMAMsgSender,
|
||||
ReadLocalFileRDMAMsgSender::ReadState> ReadLocalFileRDMAMsgEx;
|
||||
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
466
storage/source/net/message/session/rw/ReadLocalFileV2MsgEx.cpp
Normal file
466
storage/source/net/message/session/rw/ReadLocalFileV2MsgEx.cpp
Normal file
@@ -0,0 +1,466 @@
|
||||
#include <program/Program.h>
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/toolkit/SessionTk.h>
|
||||
#include <net/msghelpers/MsgHelperIO.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include "ReadLocalFileV2MsgEx.h"
|
||||
#ifdef BEEGFS_NVFS
|
||||
#include "ReadLocalFileRDMAMsgEx.h"
|
||||
#endif
|
||||
#include <sys/sendfile.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#define READ_USE_TUNEFILEREAD_TRIGGER (4*1024*1024) /* seq IO trigger for tuneFileReadSize */
|
||||
|
||||
#define READ_BUF_OFFSET_PROTO_MIN (sizeof(int64_t) ) /* for prepended length info */
|
||||
#define READ_BUF_END_PROTO_MIN (sizeof(int64_t) ) /* for appended length info */
|
||||
|
||||
|
||||
/* reserve more than necessary at buf start to achieve page cache alignment */
|
||||
const size_t READ_BUF_OFFSET =
|
||||
BEEGFS_MAX( (long)READ_BUF_OFFSET_PROTO_MIN, sysconf(_SC_PAGESIZE) );
|
||||
/* reserve more than necessary at buf end to achieve page cache alignment */
|
||||
const size_t READ_BUF_END_RESERVE =
|
||||
BEEGFS_MAX( (long)READ_BUF_END_PROTO_MIN, sysconf(_SC_PAGESIZE) );
|
||||
/* read buffer size cutoff for protocol data */
|
||||
const size_t READ_BUF_LEN_PROTOCOL_CUTOFF =
|
||||
READ_BUF_OFFSET + READ_BUF_END_RESERVE;
|
||||
|
||||
|
||||
// A linker error occurs for processIncoming without having this forced linkage.
|
||||
static ReadLocalFileV2MsgEx forcedLinkageV2;
|
||||
#ifdef BEEGFS_NVFS
|
||||
static ReadLocalFileRDMAMsgEx forcedLinkageRDMA;
|
||||
#endif
|
||||
|
||||
std::string ReadLocalFileV2MsgSender::logContextPref = "ReadChunkFileV2Msg";
|
||||
#ifdef BEEGFS_NVFS
|
||||
std::string ReadLocalFileRDMAMsgSender::logContextPref = "ReadChunkFileRDMAMsg";
|
||||
#endif
|
||||
|
||||
template <class Msg, typename ReadState>
|
||||
bool ReadLocalFileMsgExBase<Msg, ReadState>::processIncoming(NetMessage::ResponseContext& ctx)
|
||||
{
|
||||
std::string logContext = Msg::logContextPref + " incoming";
|
||||
|
||||
bool retVal = true; // return value
|
||||
|
||||
int64_t readRes = 0;
|
||||
|
||||
std::string fileHandleID(getFileHandleID() );
|
||||
bool isMirrorSession = isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_BUDDYMIRROR);
|
||||
|
||||
// do session check only when it is not a mirror session
|
||||
bool useSessionCheck = isMirrorSession ? false :
|
||||
isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_SESSION_CHECK);
|
||||
|
||||
App* app = Program::getApp();
|
||||
SessionStore* sessions = app->getSessions();
|
||||
auto session = sessions->referenceOrAddSession(getClientNumID());
|
||||
this->sessionLocalFiles = session->getLocalFiles();
|
||||
|
||||
// select the right targetID
|
||||
|
||||
uint16_t targetID = getTargetID();
|
||||
|
||||
if(isMirrorSession )
|
||||
{ // given targetID refers to a buddy mirror group
|
||||
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
targetID = isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
|
||||
mirrorBuddies->getSecondaryTargetID(targetID) :
|
||||
mirrorBuddies->getPrimaryTargetID(targetID);
|
||||
|
||||
// note: only log message here, error handling will happen below through invalid targetFD
|
||||
if(unlikely(!targetID) )
|
||||
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
|
||||
StringTk::uintToStr(getTargetID() ) );
|
||||
}
|
||||
|
||||
auto* const target = app->getStorageTargets()->getTarget(targetID);
|
||||
if (!target)
|
||||
{
|
||||
if (isMirrorSession)
|
||||
{ /* buddy mirrored file => fail with Err_COMMUNICATION to make the requestor retry.
|
||||
mgmt will mark this target as (p)offline in a few moments. */
|
||||
LOG(GENERAL, NOTICE, "Unknown target ID, refusing request.", targetID);
|
||||
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
|
||||
return true;
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
|
||||
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_UNKNOWNTARGET);
|
||||
return true;
|
||||
}
|
||||
|
||||
// check if we already have a session for this file...
|
||||
|
||||
auto sessionLocalFile = sessionLocalFiles->referenceSession(
|
||||
fileHandleID, targetID, isMirrorSession);
|
||||
if(!sessionLocalFile)
|
||||
{ // sessionLocalFile not exists yet => create, insert, re-get it
|
||||
if(useSessionCheck)
|
||||
{ // server crashed during the write, maybe lost some data send error to client
|
||||
LogContext log(logContext);
|
||||
log.log(Log_WARNING, "Potential cache loss for open file handle. (Server crash detected.) "
|
||||
"No session for file available. "
|
||||
"FileHandleID: " + fileHandleID);
|
||||
|
||||
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_STORAGE_SRV_CRASHED);
|
||||
goto release_session;
|
||||
}
|
||||
|
||||
std::string fileID = SessionTk::fileIDFromHandleID(fileHandleID);
|
||||
int openFlags = SessionTk::sysOpenFlagsFromFhgfsAccessFlags(getAccessFlags() );
|
||||
|
||||
auto newFile = boost::make_unique<SessionLocalFile>(fileHandleID, targetID, fileID, openFlags,
|
||||
false);
|
||||
|
||||
if(isMirrorSession)
|
||||
newFile->setIsMirrorSession(true);
|
||||
|
||||
sessionLocalFile = sessionLocalFiles->addAndReferenceSession(std::move(newFile));
|
||||
}
|
||||
else
|
||||
{ // session file exists
|
||||
if(useSessionCheck && sessionLocalFile->isServerCrashed() )
|
||||
{ // server crashed during the write, maybe lost some data send error to client
|
||||
LogContext log(logContext);
|
||||
log.log(Log_SPAM, "Potential cache loss for open file handle. (Server crash detected.) "
|
||||
"The session is marked as dirty. "
|
||||
"FileHandleID: " + fileHandleID);
|
||||
|
||||
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_STORAGE_SRV_CRASHED);
|
||||
goto release_session;
|
||||
}
|
||||
}
|
||||
|
||||
/* Note: the session file must be unlocked/released before we send the finalizing info,
|
||||
because otherwise we have a race when the client assumes the read is complete and tries
|
||||
to close the file (while the handle is actually still referenced on the server). */
|
||||
/* Note: we also must be careful to update the current offset before sending the final length
|
||||
info because otherwise the session file might have been released already and we have no
|
||||
longer access to the offset. */
|
||||
|
||||
readRes = -1;
|
||||
try
|
||||
{
|
||||
// prepare file descriptor (if file not open yet then open it if it exists already)
|
||||
FhgfsOpsErr openRes = openFile(*target, sessionLocalFile.get());
|
||||
if(openRes != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
sendLengthInfo(ctx.getSocket(), -openRes);
|
||||
goto release_session;
|
||||
}
|
||||
|
||||
// check if file exists
|
||||
if(!sessionLocalFile->getFD().valid())
|
||||
{ // file didn't exist (not an error) => send EOF
|
||||
sendLengthInfo(ctx.getSocket(), 0);
|
||||
goto release_session;
|
||||
}
|
||||
|
||||
// the actual read workhorse...
|
||||
|
||||
readRes = incrementalReadStatefulAndSendV2(ctx, sessionLocalFile.get());
|
||||
|
||||
LOG_DEBUG(logContext, Log_SPAM, "sending completed. "
|
||||
"readRes: " + StringTk::int64ToStr(readRes) );
|
||||
IGNORE_UNUSED_VARIABLE(readRes);
|
||||
|
||||
}
|
||||
catch(SocketException& e)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("SocketException occurred: ") + e.what() );
|
||||
LogContext(logContext).log(Log_WARNING, "Details: "
|
||||
"sessionID: " + getClientNumID().str() + "; "
|
||||
"fileHandle: " + fileHandleID + "; "
|
||||
"offset: " + StringTk::int64ToStr(getOffset() ) + "; "
|
||||
"count: " + StringTk::int64ToStr(getCount() ) );
|
||||
|
||||
sessionLocalFile->setOffset(-1); /* invalidate offset (we can only do this if still locked,
|
||||
but that's not a prob if we update offset correctly before send - see notes above) */
|
||||
|
||||
retVal = false;
|
||||
goto release_session;
|
||||
}
|
||||
|
||||
release_session:
|
||||
|
||||
// update operation counters
|
||||
|
||||
if(likely(readRes > 0) )
|
||||
app->getNodeOpStats()->updateNodeOp(
|
||||
ctx.getSocket()->getPeerIP(), StorageOpCounter_READOPS, readRes, getMsgHeaderUserID() );
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
inline size_t ReadLocalFileV2MsgSender::getBuffers(ResponseContext& ctx, char** dataBuf, char** sendBuf)
|
||||
{
|
||||
*dataBuf = ctx.getBuffer() + READ_BUF_OFFSET; // offset for prepended data length info
|
||||
*sendBuf = *dataBuf - READ_BUF_OFFSET_PROTO_MIN;
|
||||
return ctx.getBufferLength() - READ_BUF_LEN_PROTOCOL_CUTOFF; /* cutoff for
|
||||
prepended and finalizing length info */
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: This is similar to incrementalReadAndSend, but uses the offset from sessionLocalFile
|
||||
* to avoid calling seek every time.
|
||||
*
|
||||
* Warning: Do not use the returned value to set the new offset, as there might be other threads
|
||||
* that also did something with the file (i.e. the io-lock is released somewhere within this
|
||||
* method).
|
||||
*
|
||||
* @return number of bytes read or some arbitrary negative value otherwise
|
||||
*/
|
||||
template <class Msg, typename ReadState>
|
||||
int64_t ReadLocalFileMsgExBase<Msg, ReadState>::incrementalReadStatefulAndSendV2(NetMessage::ResponseContext& ctx,
|
||||
SessionLocalFile* sessionLocalFile)
|
||||
{
|
||||
/* note on session offset: the session offset must always be set before sending the data to the
|
||||
client (otherwise the client could send the next request before we updated the offset, which
|
||||
would lead to a race condition) */
|
||||
|
||||
std::string logContext = Msg::logContextPref + " (read incremental)";
|
||||
Config* cfg = Program::getApp()->getConfig();
|
||||
|
||||
char* dataBuf;
|
||||
char* sendBuf;
|
||||
|
||||
if (READ_BUF_LEN_PROTOCOL_CUTOFF >= ctx.getBufferLength())
|
||||
{ // buffer too small. That shouldn't happen and is an error
|
||||
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_INTERNAL);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const ssize_t dataBufLen = getBuffers(ctx, &dataBuf, &sendBuf);
|
||||
|
||||
auto& fd = sessionLocalFile->getFD();
|
||||
int64_t oldOffset = sessionLocalFile->getOffset();
|
||||
int64_t newOffset = getOffset();
|
||||
|
||||
bool skipReadAhead =
|
||||
unlikely(isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_DISABLE_IO) ||
|
||||
sessionLocalFile->getIsDirectIO());
|
||||
|
||||
ssize_t readAheadSize = skipReadAhead ? 0 : cfg->getTuneFileReadAheadSize();
|
||||
ssize_t readAheadTriggerSize = cfg->getTuneFileReadAheadTriggerSize();
|
||||
|
||||
if( (oldOffset < 0) || (oldOffset != newOffset) )
|
||||
{
|
||||
sessionLocalFile->resetReadCounter(); // reset sequential read counter
|
||||
sessionLocalFile->resetLastReadAheadTrigger();
|
||||
}
|
||||
else
|
||||
{ // read continues at previous offset
|
||||
LOG_DEBUG(logContext, Log_SPAM,
|
||||
"fileID: " + sessionLocalFile->getFileID() + "; "
|
||||
"offset: " + StringTk::int64ToStr(getOffset() ) );
|
||||
}
|
||||
|
||||
size_t maxReadAtOnceLen = dataBufLen;
|
||||
|
||||
// reduce maxReadAtOnceLen to achieve better read/send aync overlap
|
||||
/* (note: reducing makes only sense if we can rely on the kernel to do some read-ahead, so don't
|
||||
reduce for direct IO and for random IO) */
|
||||
if( (sessionLocalFile->getReadCounter() >= READ_USE_TUNEFILEREAD_TRIGGER) &&
|
||||
!sessionLocalFile->getIsDirectIO() )
|
||||
maxReadAtOnceLen = BEEGFS_MIN(dataBufLen, cfg->getTuneFileReadSize() );
|
||||
|
||||
off_t readOffset = getOffset();
|
||||
ReadState readState(logContext.c_str(), getCount(), sessionLocalFile);
|
||||
|
||||
if (!isMsgValid() || !readStateInit(readState))
|
||||
{
|
||||
LogContext(logContext).logErr("Invalid read message.");
|
||||
sessionLocalFile->setOffset(-1);
|
||||
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_INVAL);
|
||||
return -1;
|
||||
}
|
||||
|
||||
for( ; ; )
|
||||
{
|
||||
ssize_t readLength = getReadLength(readState, BEEGFS_MIN(maxReadAtOnceLen, readState.toBeRead));
|
||||
|
||||
readState.readRes = unlikely(isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_DISABLE_IO) ) ?
|
||||
readLength : MsgHelperIO::pread(*fd, dataBuf, readLength, readOffset);
|
||||
|
||||
LOG_DEBUG(logContext, Log_SPAM,
|
||||
"toBeRead: " + StringTk::int64ToStr(readState.toBeRead) + "; "
|
||||
"readLength: " + StringTk::int64ToStr(readLength) + "; "
|
||||
"readRes: " + StringTk::int64ToStr(readState.readRes) );
|
||||
|
||||
if(readState.readRes == readLength)
|
||||
{ // simple success case
|
||||
readState.toBeRead -= readState.readRes;
|
||||
|
||||
readOffset += readState.readRes;
|
||||
|
||||
int64_t newOffset = getOffset() + getCount() - readState.toBeRead;
|
||||
sessionLocalFile->setOffset(newOffset); // update offset
|
||||
|
||||
sessionLocalFile->incReadCounter(readState.readRes); // update sequential read length
|
||||
|
||||
ctx.getStats()->incVals.diskReadBytes += readState.readRes; // update stats
|
||||
|
||||
bool isFinal = !readState.toBeRead;
|
||||
|
||||
if (readStateSendData(ctx.getSocket(), readState, sendBuf, isFinal) < 0)
|
||||
{
|
||||
LogContext(logContext).logErr("readStateSendData failed.");
|
||||
sessionLocalFile->setOffset(-1);
|
||||
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
|
||||
return -1;
|
||||
}
|
||||
|
||||
checkAndStartReadAhead(sessionLocalFile, readAheadTriggerSize, newOffset, readAheadSize);
|
||||
|
||||
if(isFinal)
|
||||
{ // we reached the end of the requested data
|
||||
return getCount();
|
||||
}
|
||||
|
||||
if (!readStateNext(readState))
|
||||
{
|
||||
LogContext(logContext).logErr("readStateNext failed.");
|
||||
sessionLocalFile->setOffset(-1);
|
||||
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // readRes not as it should be => might be an error or just an end-of-file
|
||||
|
||||
if(readState.readRes == -1)
|
||||
{ // read error occurred
|
||||
LogContext(logContext).log(Log_WARNING, "Unable to read file data. "
|
||||
"FileID: " + sessionLocalFile->getFileID() + "; "
|
||||
"SysErr: " + System::getErrString() );
|
||||
|
||||
sessionLocalFile->setOffset(-1);
|
||||
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_INTERNAL);
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{ // just an end of file
|
||||
LOG_DEBUG(logContext, Log_DEBUG,
|
||||
"Unable to read all of the requested data (=> end of file)");
|
||||
LOG_DEBUG(logContext, Log_DEBUG,
|
||||
"offset: " + StringTk::int64ToStr(getOffset() ) + "; "
|
||||
"count: " + StringTk::int64ToStr(getCount() ) + "; "
|
||||
"readLength: " + StringTk::int64ToStr(readLength) + "; " +
|
||||
"readRes: " + StringTk::int64ToStr(readState.readRes) + "; " +
|
||||
"toBeRead: " + StringTk::int64ToStr(readState.toBeRead) );
|
||||
|
||||
readOffset += readState.readRes;
|
||||
readState.toBeRead -= readState.readRes;
|
||||
|
||||
sessionLocalFile->setOffset(getOffset() + getCount() - readState.toBeRead); // update offset
|
||||
|
||||
sessionLocalFile->incReadCounter(readState.readRes); // update sequential read length
|
||||
|
||||
ctx.getStats()->incVals.diskReadBytes += readState.readRes; // update stats
|
||||
|
||||
if(readState.readRes > 0)
|
||||
{
|
||||
if (readStateSendData(ctx.getSocket(), readState, sendBuf, true) < 0)
|
||||
{
|
||||
LogContext(logContext).logErr("readStateSendData failed.");
|
||||
sessionLocalFile->setOffset(-1);
|
||||
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
sendLengthInfo(ctx.getSocket(), 0);
|
||||
|
||||
return(getCount() - readState.toBeRead);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} // end of for-loop
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts read-ahead if enough sequential data has been read.
|
||||
*
|
||||
* Note: if getDisableIO() is true, we assume the caller sets readAheadSize==0, so getDisableIO()
|
||||
* is not checked explicitly within this function.
|
||||
*
|
||||
* @sessionLocalFile lastReadAheadOffset will be updated if read-head was triggered
|
||||
* @param readAheadTriggerSize the length of sequential IO that triggers read-ahead
|
||||
* @param currentOffset current file offset (where read-ahead would start)
|
||||
*/
|
||||
template <class Msg, typename ReadState>
|
||||
void ReadLocalFileMsgExBase<Msg, ReadState>::checkAndStartReadAhead(SessionLocalFile* sessionLocalFile,
|
||||
ssize_t readAheadTriggerSize, off_t currentOffset, off_t readAheadSize)
|
||||
{
|
||||
std::string logContext = Msg::logContextPref + " (read-ahead)";
|
||||
|
||||
if(!readAheadSize)
|
||||
return;
|
||||
|
||||
int64_t readCounter = sessionLocalFile->getReadCounter();
|
||||
int64_t nextReadAheadTrigger = sessionLocalFile->getLastReadAheadTrigger() ?
|
||||
sessionLocalFile->getLastReadAheadTrigger() + readAheadSize : readAheadTriggerSize;
|
||||
|
||||
if(readCounter < nextReadAheadTrigger)
|
||||
return; // we're not at the trigger point yet
|
||||
|
||||
/* start read-head...
|
||||
(read-ahead is supposed to be non-blocking if there are free slots in the device IO queue) */
|
||||
|
||||
LOG_DEBUG(logContext, Log_SPAM,
|
||||
std::string("Starting read-ahead... ") +
|
||||
"offset: " + StringTk::int64ToStr(currentOffset) + "; "
|
||||
"size: " + StringTk::int64ToStr(readAheadSize) );
|
||||
|
||||
MsgHelperIO::readAhead(*sessionLocalFile->getFD(), currentOffset, readAheadSize);
|
||||
|
||||
// update trigger
|
||||
|
||||
sessionLocalFile->setLastReadAheadTrigger(readCounter);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Open the file if a filedescriptor is not already set in sessionLocalFile.
|
||||
* If the file needs to be opened, this method will check the target consistency state before
|
||||
* opening.
|
||||
*
|
||||
* @return we return the special value FhgfsOpsErr_COMMUNICATION here in some cases to indirectly
|
||||
* ask the client for a retry (e.g. if target consistency is not good for buddymirrored chunks).
|
||||
*/
|
||||
template <class Msg, typename ReadState>
|
||||
FhgfsOpsErr ReadLocalFileMsgExBase<Msg, ReadState>::openFile(const StorageTarget& target,
|
||||
SessionLocalFile* sessionLocalFile)
|
||||
{
|
||||
std::string logContext = Msg::logContextPref + " (open)";
|
||||
|
||||
bool isBuddyMirrorChunk = sessionLocalFile->getIsMirrorSession();
|
||||
|
||||
|
||||
if (sessionLocalFile->getFD().valid())
|
||||
return FhgfsOpsErr_SUCCESS; // file already open => nothing to be done here
|
||||
|
||||
|
||||
// file not open yet => get targetFD and check consistency state
|
||||
|
||||
const auto consistencyState = target.getConsistencyState();
|
||||
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
|
||||
|
||||
if(unlikely(consistencyState != TargetConsistencyState_GOOD) && isBuddyMirrorChunk)
|
||||
{ // this is a request for a buddymirrored chunk on a non-good target
|
||||
LogContext(logContext).log(Log_NOTICE, "Refusing request. Target consistency is not good. "
|
||||
"targetID: " + StringTk::uintToStr(target.getID()));
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
FhgfsOpsErr openChunkRes = sessionLocalFile->openFile(targetFD, getPathInfo(), false, NULL);
|
||||
|
||||
return openChunkRes;
|
||||
}
|
||||
216
storage/source/net/message/session/rw/ReadLocalFileV2MsgEx.h
Normal file
216
storage/source/net/message/session/rw/ReadLocalFileV2MsgEx.h
Normal file
@@ -0,0 +1,216 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/session/rw/ReadLocalFileV2Msg.h>
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <session/SessionLocalFileStore.h>
|
||||
|
||||
class StorageTarget;
|
||||
|
||||
/**
|
||||
* Contains common data needed by implementations of the network protocol
|
||||
* that send data to the client.
|
||||
*/
|
||||
struct ReadStateBase
|
||||
{
|
||||
const char* logContext;
|
||||
uint64_t toBeRead;
|
||||
SessionLocalFile* sessionLocalFile;
|
||||
ssize_t readRes;
|
||||
|
||||
ReadStateBase(const char* logContext, uint64_t toBeRead,
|
||||
SessionLocalFile* sessionLocalFile)
|
||||
{
|
||||
this->logContext = logContext;
|
||||
this->toBeRead = toBeRead;
|
||||
this->sessionLocalFile = sessionLocalFile;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Msg, typename ReadState>
|
||||
class ReadLocalFileMsgExBase : public Msg
|
||||
{
|
||||
public:
|
||||
bool processIncoming(NetMessage::ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
SessionLocalFileStore* sessionLocalFiles;
|
||||
|
||||
FhgfsOpsErr openFile(const StorageTarget& target, SessionLocalFile* sessionLocalFile);
|
||||
|
||||
void checkAndStartReadAhead(SessionLocalFile* sessionLocalFile, ssize_t readAheadTriggerSize,
|
||||
off_t currentOffset, off_t readAheadSize);
|
||||
|
||||
int64_t incrementalReadStatefulAndSendV2(NetMessage::ResponseContext& ctx,
|
||||
SessionLocalFile* sessionLocalFile);
|
||||
|
||||
inline void sendLengthInfo(Socket* sock, int64_t lengthInfo)
|
||||
{
|
||||
static_cast<Msg&>(*this).sendLengthInfo(sock, lengthInfo);
|
||||
}
|
||||
|
||||
inline bool readStateInit(ReadState& rs)
|
||||
{
|
||||
return static_cast<Msg&>(*this).readStateInit(rs);
|
||||
}
|
||||
|
||||
inline ssize_t readStateSendData(Socket* sock, ReadState& rs, char* buf, bool isFinal)
|
||||
{
|
||||
return static_cast<Msg&>(*this).readStateSendData(sock, rs, buf, isFinal);
|
||||
}
|
||||
|
||||
inline bool readStateNext(ReadState& rs)
|
||||
{
|
||||
return static_cast<Msg&>(*this).readStateNext(rs);
|
||||
}
|
||||
|
||||
inline ssize_t getReadLength(ReadState& rs, ssize_t len)
|
||||
{
|
||||
return static_cast<Msg&>(*this).getReadLength(rs, len);
|
||||
}
|
||||
|
||||
inline size_t getBuffers(NetMessage::ResponseContext& ctx, char** dataBuf, char** sendBuf)
|
||||
{
|
||||
return static_cast<Msg&>(*this).getBuffers(ctx, dataBuf, sendBuf);
|
||||
}
|
||||
|
||||
public:
|
||||
inline unsigned getMsgHeaderUserID() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getMsgHeaderUserID();
|
||||
}
|
||||
|
||||
inline bool isMsgHeaderFeatureFlagSet(unsigned flag) const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).isMsgHeaderFeatureFlagSet(flag);
|
||||
}
|
||||
|
||||
inline uint16_t getTargetID() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getTargetID();
|
||||
}
|
||||
|
||||
inline int64_t getOffset() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getOffset();
|
||||
}
|
||||
|
||||
inline int64_t getCount() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getCount();
|
||||
}
|
||||
|
||||
inline const char* getFileHandleID()
|
||||
{
|
||||
return static_cast<Msg&>(*this).getFileHandleID();
|
||||
}
|
||||
|
||||
inline NumNodeID getClientNumID() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getClientNumID();
|
||||
}
|
||||
|
||||
inline unsigned getAccessFlags() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getAccessFlags();
|
||||
}
|
||||
|
||||
inline PathInfo* getPathInfo ()
|
||||
{
|
||||
return static_cast<Msg&>(*this).getPathInfo();
|
||||
}
|
||||
|
||||
inline bool isMsgValid() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).isMsgValid();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* Implements the Version 2 send protocol. It uses a preceding length info for each chunk.
|
||||
*/
|
||||
class ReadLocalFileV2MsgSender : public ReadLocalFileV2Msg
|
||||
{
|
||||
/* note on protocol: this works by sending an int64 before each data chunk, which contains the
|
||||
length of the next data chunk; or a zero if no more data can be read; or a negative fhgfs
|
||||
error code in case of an error */
|
||||
public:
|
||||
struct ReadState : public ReadStateBase
|
||||
{
|
||||
ReadState(const char* logContext, uint64_t toBeRead,
|
||||
SessionLocalFile* sessionLocalFile) :
|
||||
ReadStateBase(logContext, toBeRead, sessionLocalFile) {}
|
||||
};
|
||||
|
||||
private:
|
||||
friend class ReadLocalFileMsgExBase<ReadLocalFileV2MsgSender, ReadState>;
|
||||
|
||||
static std::string logContextPref;
|
||||
|
||||
/**
|
||||
* Send only length information without a data packet. Typically used for the final length
|
||||
* info at the end of the requested data.
|
||||
*/
|
||||
inline void sendLengthInfo(Socket* sock, int64_t lengthInfo)
|
||||
{
|
||||
lengthInfo = HOST_TO_LE_64(lengthInfo);
|
||||
sock->send(&lengthInfo, sizeof(int64_t), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* No-op for this implementation.
|
||||
*/
|
||||
inline bool readStateInit(ReadState& rs)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send length information and the corresponding data packet buffer.
|
||||
*
|
||||
* Note: rs.readRes is used to compute buf length for send()
|
||||
*
|
||||
* @param rs.readRes must not be negative
|
||||
* @param buf the buffer with a preceding gap for the length info
|
||||
* @param isFinal true if this is the last send, i.e. we have read all data
|
||||
*/
|
||||
inline ssize_t readStateSendData(Socket* sock, ReadState& rs, char* buf, bool isFinal)
|
||||
{
|
||||
ssize_t sendRes;
|
||||
{
|
||||
Serializer ser(buf, sizeof(int64_t));
|
||||
ser % rs.readRes;
|
||||
}
|
||||
|
||||
if (isFinal)
|
||||
{
|
||||
Serializer ser(buf + sizeof(int64_t) + rs.readRes, sizeof(int64_t));
|
||||
ser % int64_t(0);
|
||||
sendRes = sock->send(buf, (2*sizeof(int64_t) ) + rs.readRes, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
sendRes = sock->send(buf, sizeof(int64_t) + rs.readRes, 0);
|
||||
}
|
||||
return sendRes;
|
||||
}
|
||||
|
||||
/**
|
||||
* No-op for this implementation.
|
||||
*/
|
||||
inline bool readStateNext(ReadState& rs)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
inline ssize_t getReadLength(ReadState& rs, ssize_t len)
|
||||
{
|
||||
return len;
|
||||
}
|
||||
|
||||
size_t getBuffers(ResponseContext& ctx, char** dataBuf, char** sendBuf);
|
||||
};
|
||||
|
||||
typedef ReadLocalFileMsgExBase<ReadLocalFileV2MsgSender,
|
||||
ReadLocalFileV2MsgSender::ReadState> ReadLocalFileV2MsgEx;
|
||||
|
||||
926
storage/source/net/message/session/rw/WriteLocalFileMsgEx.cpp
Normal file
926
storage/source/net/message/session/rw/WriteLocalFileMsgEx.cpp
Normal file
@@ -0,0 +1,926 @@
|
||||
#include <program/Program.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/toolkit/SessionTk.h>
|
||||
#include <common/toolkit/StorageTk.h>
|
||||
#include <net/msghelpers/MsgHelperIO.h>
|
||||
#include <storage/StorageTargets.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include "WriteLocalFileMsgEx.h"
|
||||
#ifdef BEEGFS_NVFS
|
||||
#include "WriteLocalFileRDMAMsgEx.h"
|
||||
#endif
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
static WriteLocalFileMsgEx forcedLinkage;
|
||||
#ifdef BEEGFS_NVFS
|
||||
static WriteLocalFileRDMAMsgEx forcedLinkageRDMA;
|
||||
#endif
|
||||
|
||||
const std::string WriteLocalFileMsgSender::logContextPref = "WriteChunkFileMsg";
|
||||
#ifdef BEEGFS_NVFS
|
||||
const std::string WriteLocalFileRDMAMsgSender::logContextPref = "WriteChunkFileRDMAMsg";
|
||||
#endif
|
||||
|
||||
template <class Msg, typename WriteState>
|
||||
bool WriteLocalFileMsgExBase<Msg, WriteState>::processIncoming(NetMessage::ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
|
||||
bool success;
|
||||
int64_t writeClientRes;
|
||||
|
||||
if (!isMsgValid())
|
||||
{
|
||||
sendResponse(ctx, FhgfsOpsErr_INVAL);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::tie(success, writeClientRes) = write(ctx);
|
||||
|
||||
if (success)
|
||||
{
|
||||
sendResponse(ctx, writeClientRes);
|
||||
|
||||
// update operation counters
|
||||
|
||||
if (likely(writeClientRes > 0))
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
|
||||
StorageOpCounter_WRITEOPS, writeClientRes, getMsgHeaderUserID());
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
template <class Msg, typename WriteState>
|
||||
std::pair<bool, int64_t> WriteLocalFileMsgExBase<Msg, WriteState>::write(NetMessage::ResponseContext& ctx)
|
||||
{
|
||||
std::string logContext = Msg::logContextPref + " incoming";
|
||||
|
||||
App* app = Program::getApp();
|
||||
|
||||
int64_t writeClientRes = -(int64_t)FhgfsOpsErr_INTERNAL; // bytes written or negative fhgfs err
|
||||
FhgfsOpsErr finishMirroringRes = FhgfsOpsErr_INTERNAL;
|
||||
std::string fileHandleID(getFileHandleID() );
|
||||
bool isMirrorSession = isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR);
|
||||
|
||||
bool serverCrashed = false;
|
||||
QuotaExceededErrorType quotaExceeded = QuotaExceededErrorType_NOT_EXCEEDED;
|
||||
|
||||
SessionStore* sessions = Program::getApp()->getSessions();
|
||||
auto session = sessions->referenceOrAddSession(getClientNumID());
|
||||
SessionLocalFileStore* sessionLocalFiles = session->getLocalFiles();
|
||||
|
||||
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
|
||||
bool chunkLocked = false;
|
||||
|
||||
// select the right targetID
|
||||
|
||||
uint16_t targetID = getTargetID();
|
||||
|
||||
if(isMirrorSession)
|
||||
{ // given targetID refers to a buddy mirror group
|
||||
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
targetID = isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
|
||||
mirrorBuddies->getSecondaryTargetID(targetID) :
|
||||
mirrorBuddies->getPrimaryTargetID(targetID);
|
||||
|
||||
// note: only log message here, error handling will happen below through invalid targetFD
|
||||
if(unlikely(!targetID) )
|
||||
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
|
||||
StringTk::uintToStr(getTargetID() ) );
|
||||
}
|
||||
|
||||
auto* const target = app->getStorageTargets()->getTarget(targetID);
|
||||
if (!target)
|
||||
{
|
||||
if (isMirrorSession)
|
||||
{ /* buddy mirrored file => fail with Err_COMMUNICATION to make the requestor retry.
|
||||
mgmt will mark this target as (p)offline in a few moments. */
|
||||
LOG(GENERAL, NOTICE, "Unknown target ID, refusing request.", targetID);
|
||||
return {false, FhgfsOpsErr_COMMUNICATION};
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
|
||||
return {false, FhgfsOpsErr_UNKNOWNTARGET};
|
||||
}
|
||||
|
||||
// check if we already have session for this file...
|
||||
|
||||
auto sessionLocalFile = sessionLocalFiles->referenceSession(
|
||||
fileHandleID, targetID, isMirrorSession);
|
||||
|
||||
if(!sessionLocalFile)
|
||||
{ // sessionLocalFile not exists yet => create, insert, re-get it
|
||||
|
||||
if(doSessionCheck() )
|
||||
{ // server crashed during the write, maybe lost some data send error to client
|
||||
LogContext log(logContext);
|
||||
log.log(Log_WARNING, "Potential cache loss for open file handle. (Server crash detected.) "
|
||||
"No session for file available. "
|
||||
"FileHandleID: " + fileHandleID);
|
||||
|
||||
serverCrashed = true;
|
||||
}
|
||||
|
||||
std::string fileID = SessionTk::fileIDFromHandleID(fileHandleID);
|
||||
int openFlags = SessionTk::sysOpenFlagsFromFhgfsAccessFlags(getAccessFlags() );
|
||||
|
||||
auto newFile = boost::make_unique<SessionLocalFile>(fileHandleID, targetID, fileID, openFlags,
|
||||
serverCrashed);
|
||||
|
||||
if(isMirrorSession)
|
||||
newFile->setIsMirrorSession(true);
|
||||
|
||||
sessionLocalFile = sessionLocalFiles->addAndReferenceSession(std::move(newFile));
|
||||
}
|
||||
else
|
||||
{ // session file exists
|
||||
|
||||
if(doSessionCheck() && sessionLocalFile->isServerCrashed() )
|
||||
{ // server crashed during the write, maybe lost some data send error to client
|
||||
LogContext log(logContext);
|
||||
log.log(Log_SPAM, "Potential cache loss for open file handle. (Server crash detected.)"
|
||||
"The session is marked as dirty. "
|
||||
"FileHandleID: " + fileHandleID);
|
||||
|
||||
serverCrashed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// check if the size quota is exceeded for the user or group
|
||||
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA) &&
|
||||
app->getConfig()->getQuotaEnableEnforcement() )
|
||||
{
|
||||
quotaExceeded = app->getExceededQuotaStores()->get(targetID)->isQuotaExceeded(getUserID(),
|
||||
getGroupID(), QuotaLimitType_SIZE);
|
||||
|
||||
if(quotaExceeded != QuotaExceededErrorType_NOT_EXCEEDED)
|
||||
{
|
||||
LogContext(logContext).log(Log_NOTICE,
|
||||
QuotaData::QuotaExceededErrorTypeToString(quotaExceeded) + " "
|
||||
"UID: " + StringTk::uintToStr(this->getUserID()) + "; "
|
||||
"GID: " + StringTk::uintToStr(this->getGroupID() ) );
|
||||
|
||||
// receive the message content before return with error
|
||||
incrementalRecvPadding(ctx, getCount(), sessionLocalFile.get());
|
||||
writeClientRes = -(int64_t) FhgfsOpsErr_DQUOT;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if(isMirrorSession && target->getBuddyResyncInProgress())
|
||||
{
|
||||
// mirrored chunk should be modified, check if resync is in progress and lock chunk
|
||||
std::string chunkID = sessionLocalFile->getFileID();
|
||||
chunkLockStore->lockChunk(targetID, chunkID);
|
||||
chunkLocked = true;
|
||||
}
|
||||
|
||||
// prepare file descriptor (if file not open yet then create/open it)
|
||||
FhgfsOpsErr openRes = openFile(*target, sessionLocalFile.get());
|
||||
if(unlikely(openRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
incrementalRecvPadding(ctx, getCount(), sessionLocalFile.get());
|
||||
writeClientRes = -(int64_t)openRes;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// store mirror node reference in session and init mirrorToSock member
|
||||
FhgfsOpsErr prepMirrorRes = prepareMirroring(ctx.getBuffer(), ctx.getBufferLength(),
|
||||
sessionLocalFile.get(), *target);
|
||||
if(unlikely(prepMirrorRes != FhgfsOpsErr_SUCCESS) )
|
||||
{ // mirroring failed
|
||||
incrementalRecvPadding(ctx, getCount(), sessionLocalFile.get());
|
||||
writeClientRes = -(int64_t)prepMirrorRes;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
|
||||
// the actual write workhorse
|
||||
|
||||
int64_t writeLocalRes = incrementalRecvAndWriteStateful(ctx, sessionLocalFile.get());
|
||||
|
||||
// update client result, offset etc.
|
||||
|
||||
int64_t newOffset;
|
||||
|
||||
if(unlikely(writeLocalRes < 0) )
|
||||
newOffset = -1; // writing failed
|
||||
else
|
||||
{ // writing succeeded
|
||||
newOffset = getOffset() + writeLocalRes;
|
||||
ctx.getStats()->incVals.diskWriteBytes += writeLocalRes; // update stats
|
||||
}
|
||||
|
||||
sessionLocalFile->setOffset(newOffset);
|
||||
|
||||
writeClientRes = writeLocalRes;
|
||||
|
||||
}
|
||||
catch(SocketException& e)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("SocketException occurred: ") + e.what() );
|
||||
LogContext(logContext).log(Log_WARNING, std::string("Details: ") +
|
||||
"sessionID: " + getClientNumID().str() + "; "
|
||||
"fileHandle: " + std::string(sessionLocalFile->getFileHandleID() ) + "; "
|
||||
"offset: " + StringTk::int64ToStr(getOffset() ) + "; "
|
||||
"count: " + StringTk::int64ToStr(getCount() ) );
|
||||
|
||||
sessionLocalFile->setOffset(-1); // invalidate offset
|
||||
|
||||
finishMirroring(sessionLocalFile.get(), *target);
|
||||
|
||||
if (chunkLocked)
|
||||
{
|
||||
std::string chunkID = sessionLocalFile->getFileID();
|
||||
chunkLockStore->unlockChunk(targetID, chunkID);
|
||||
}
|
||||
|
||||
return {false, -1};
|
||||
}
|
||||
|
||||
|
||||
cleanup:
|
||||
finishMirroringRes = finishMirroring(sessionLocalFile.get(), *target);
|
||||
|
||||
// check mirroring result (don't overwrite local error code, if any)
|
||||
if(likely(writeClientRes > 0) )
|
||||
{ // no local error => check mirroring result
|
||||
if(unlikely(finishMirroringRes != FhgfsOpsErr_SUCCESS) )
|
||||
writeClientRes = -finishMirroringRes; // mirroring failed => use err code as client result
|
||||
}
|
||||
|
||||
if (chunkLocked)
|
||||
{
|
||||
std::string chunkID = sessionLocalFile->getFileID();
|
||||
chunkLockStore->unlockChunk(targetID, chunkID);
|
||||
}
|
||||
|
||||
if (serverCrashed)
|
||||
writeClientRes = -(int64_t) FhgfsOpsErr_STORAGE_SRV_CRASHED;
|
||||
|
||||
return {true, writeClientRes};
|
||||
}
|
||||
|
||||
ssize_t WriteLocalFileMsgSender::recvPadding(ResponseContext& ctx, int64_t toBeReceived)
|
||||
{
|
||||
Config* cfg = Program::getApp()->getConfig();
|
||||
return ctx.getSocket()->recvT(ctx.getBuffer(),
|
||||
BEEGFS_MIN(toBeReceived, ctx.getBufferLength()), 0, cfg->getConnMsgMediumTimeout());
|
||||
}
|
||||
|
||||
#ifdef BEEGFS_NVFS
|
||||
|
||||
ssize_t WriteLocalFileRDMAMsgSender::recvPadding(ResponseContext& ctx, int64_t toBeReceived)
|
||||
{
|
||||
RdmaInfo* rdma = getRdmaInfo();
|
||||
uint64_t rBuf;
|
||||
size_t rLen;
|
||||
uint64_t rOff;
|
||||
|
||||
if (!rdma->next(rBuf, rLen, rOff))
|
||||
return -1;
|
||||
|
||||
ssize_t recvLength = BEEGFS_MIN(ctx.getBufferLength(), toBeReceived);
|
||||
recvLength = BEEGFS_MIN(recvLength, (ssize_t)(rLen - rOff));
|
||||
return ctx.getSocket()->read(ctx.getBuffer(), recvLength, 0, rBuf+rOff, rdma->key);
|
||||
}
|
||||
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
/**
|
||||
* Note: New offset is saved in the session by the caller afterwards (to make life easier).
|
||||
* @return number of written bytes or negative fhgfs error code
|
||||
*/
|
||||
template <class Msg, typename WriteState>
|
||||
int64_t WriteLocalFileMsgExBase<Msg, WriteState>::incrementalRecvAndWriteStateful(NetMessage::ResponseContext& ctx,
|
||||
SessionLocalFile* sessionLocalFile)
|
||||
{
|
||||
std::string logContext = Msg::logContextPref + " (write incremental)";
|
||||
Config* cfg = Program::getApp()->getConfig();
|
||||
|
||||
// we can securely cast getTuneFileWriteSize to size_t below to make a comparision possible, as
|
||||
// it can technically never be negative and will therefore always fit into size_t
|
||||
const ssize_t exactStaticRecvSize = sessionLocalFile->getIsDirectIO()
|
||||
? ctx.getBufferLength()
|
||||
: BEEGFS_MIN(ctx.getBufferLength(), (size_t)cfg->getTuneFileWriteSize() );
|
||||
|
||||
auto& fd = sessionLocalFile->getFD();
|
||||
|
||||
int64_t oldOffset = sessionLocalFile->getOffset();
|
||||
int64_t newOffset = getOffset();
|
||||
bool useSyncRange = false; // true if sync_file_range should be called
|
||||
|
||||
if( (oldOffset < 0) || (oldOffset != newOffset) )
|
||||
sessionLocalFile->resetWriteCounter(); // reset sequential write counter
|
||||
else
|
||||
{ // continue at previous offset => increase sequential write counter
|
||||
LOG_DEBUG(logContext, Log_SPAM, "Offset: " + StringTk::int64ToStr(getOffset() ) );
|
||||
|
||||
sessionLocalFile->incWriteCounter(getCount() );
|
||||
|
||||
ssize_t syncSize = unlikely(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) ) ?
|
||||
0 : cfg->getTuneFileWriteSyncSize();
|
||||
if (syncSize && (sessionLocalFile->getWriteCounter() >= syncSize) )
|
||||
useSyncRange = true;
|
||||
}
|
||||
|
||||
// incrementally receive file contents...
|
||||
|
||||
WriteState writeState(logContext.c_str(), exactStaticRecvSize,
|
||||
getCount(), getOffset(), sessionLocalFile);
|
||||
if (!writeStateInit(writeState))
|
||||
return -FhgfsOpsErr_COMMUNICATION;
|
||||
|
||||
do
|
||||
{
|
||||
// receive some bytes...
|
||||
|
||||
LOG_DEBUG(logContext, Log_SPAM,
|
||||
"receiving... (remaining: " + StringTk::intToStr(writeState.toBeReceived) + ")");
|
||||
|
||||
ssize_t recvRes = writeStateRecvData(ctx, writeState);
|
||||
if (recvRes < 0)
|
||||
{
|
||||
LogContext(logContext).log(Log_WARNING, "Socket data transfer error occurred. ");
|
||||
return -FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
// forward to mirror...
|
||||
|
||||
FhgfsOpsErr mirrorRes = sendToMirror(ctx.getBuffer(), recvRes,
|
||||
writeState.writeOffset, writeState.toBeReceived, sessionLocalFile);
|
||||
if(unlikely(mirrorRes != FhgfsOpsErr_SUCCESS) )
|
||||
{ // mirroring failed
|
||||
incrementalRecvPadding(ctx, writeState.toBeReceived, sessionLocalFile);
|
||||
|
||||
return -FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
// write to underlying file system...
|
||||
|
||||
int errCode = 0;
|
||||
ssize_t writeRes = unlikely(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) )
|
||||
? recvRes
|
||||
: doWrite(*fd, ctx.getBuffer(), recvRes, writeState.writeOffset, errCode);
|
||||
|
||||
writeState.toBeReceived -= recvRes;
|
||||
|
||||
// handle write errors...
|
||||
|
||||
if(unlikely(writeRes != recvRes) )
|
||||
{ // didn't write all of the received data
|
||||
|
||||
if(writeRes == -1)
|
||||
{ // write error occurred
|
||||
LogContext(logContext).log(Log_WARNING, "Write error occurred. "
|
||||
"FileHandleID: " + sessionLocalFile->getFileHandleID() + "."
|
||||
"Target: " + StringTk::uintToStr(sessionLocalFile->getTargetID() ) + ". "
|
||||
"File: " + sessionLocalFile->getFileID() + ". "
|
||||
"SysErr: " + System::getErrString(errCode) );
|
||||
LogContext(logContext).log(Log_NOTICE, std::string("Additional info: "
|
||||
"FD: ") + StringTk::intToStr(*fd) + " " +
|
||||
"OpenFlags: " + StringTk::intToStr(sessionLocalFile->getOpenFlags() ) + " " +
|
||||
"received: " + StringTk::intToStr(recvRes) + ".");
|
||||
|
||||
incrementalRecvPadding(ctx, writeState.toBeReceived, sessionLocalFile);
|
||||
|
||||
return -FhgfsOpsErrTk::fromSysErr(errCode);
|
||||
}
|
||||
else
|
||||
{ // wrote only a part of the data, not all of it
|
||||
LogContext(logContext).log(Log_WARNING,
|
||||
"Unable to write all of the received data. "
|
||||
"target: " + StringTk::uintToStr(sessionLocalFile->getTargetID() ) + "; "
|
||||
"file: " + sessionLocalFile->getFileID() + "; "
|
||||
"sysErr: " + System::getErrString(errCode) );
|
||||
|
||||
incrementalRecvPadding(ctx, writeState.toBeReceived, sessionLocalFile);
|
||||
|
||||
// return bytes received so far minus num bytes that were not written with last write
|
||||
return (getCount() - writeState.toBeReceived) - (recvRes - writeRes);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
writeState.writeOffset += writeRes;
|
||||
recvRes = writeStateNext(writeState, writeRes);
|
||||
if (recvRes != 0)
|
||||
return recvRes;
|
||||
} while(writeState.toBeReceived);
|
||||
|
||||
LOG_DEBUG(logContext, Log_SPAM,
|
||||
std::string("Received and wrote all the data") );
|
||||
|
||||
// commit to storage device queue...
|
||||
|
||||
if (useSyncRange)
|
||||
{
|
||||
// advise kernel to commit written data to storage device in max_sectors_kb chunks.
|
||||
|
||||
/* note: this is async if there are free slots in the request queue
|
||||
/sys/block/<...>/nr_requests. (optimal_io_size is not honoured as of linux-3.4) */
|
||||
|
||||
off64_t syncSize = sessionLocalFile->getWriteCounter();
|
||||
off64_t syncOffset = getOffset() + getCount() - syncSize;
|
||||
|
||||
MsgHelperIO::syncFileRange(*fd, syncOffset, syncSize);
|
||||
sessionLocalFile->resetWriteCounter();
|
||||
}
|
||||
|
||||
return getCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Write until everything was written (handle short-writes) or an error occured
|
||||
*/
|
||||
template <class Msg, typename WriteState>
|
||||
ssize_t WriteLocalFileMsgExBase<Msg, WriteState>::doWrite(int fd, char* buf, size_t count, off_t offset, int& outErrno)
|
||||
{
|
||||
size_t sumWriteRes = 0;
|
||||
|
||||
do
|
||||
{
|
||||
ssize_t writeRes =
|
||||
MsgHelperIO::pwrite(fd, buf + sumWriteRes, count - sumWriteRes, offset + sumWriteRes);
|
||||
|
||||
if (unlikely(writeRes == -1) )
|
||||
{
|
||||
sumWriteRes = (sumWriteRes > 0) ? sumWriteRes : writeRes;
|
||||
outErrno = errno;
|
||||
break;
|
||||
}
|
||||
|
||||
sumWriteRes += writeRes;
|
||||
|
||||
} while (sumWriteRes != count);
|
||||
|
||||
return sumWriteRes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Receive and discard data.
|
||||
*/
|
||||
template <class Msg, typename WriteState>
|
||||
void WriteLocalFileMsgExBase<Msg, WriteState>::incrementalRecvPadding(NetMessage::ResponseContext& ctx,
|
||||
int64_t padLen, SessionLocalFile* sessionLocalFile)
|
||||
{
|
||||
uint64_t toBeReceived = padLen;
|
||||
|
||||
while(toBeReceived)
|
||||
{
|
||||
ssize_t recvRes = recvPadding(ctx, toBeReceived);
|
||||
if (recvRes == -1)
|
||||
break;
|
||||
// forward to mirror...
|
||||
|
||||
FhgfsOpsErr mirrorRes = sendToMirror(ctx.getBuffer(), recvRes,
|
||||
getOffset() + padLen - toBeReceived, toBeReceived, sessionLocalFile);
|
||||
if(unlikely(mirrorRes != FhgfsOpsErr_SUCCESS) )
|
||||
{ // mirroring failed
|
||||
/* ... but if we are in this method, then something went wrong anyways, so don't set
|
||||
needs-resync here or report any error to caller. */
|
||||
}
|
||||
|
||||
toBeReceived -= recvRes;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Msg, typename WriteState>
|
||||
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::openFile(const StorageTarget& target,
|
||||
SessionLocalFile* sessionLocalFile)
|
||||
{
|
||||
std::string logContext = Msg::logContextPref + " (write incremental)";
|
||||
|
||||
bool useQuota = isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA);
|
||||
bool enforceQuota = Program::getApp()->getConfig()->getQuotaEnableEnforcement();
|
||||
|
||||
bool isBuddyMirrorChunk = sessionLocalFile->getIsMirrorSession();
|
||||
|
||||
|
||||
if (sessionLocalFile->getFD().valid())
|
||||
return FhgfsOpsErr_SUCCESS; // file already open => nothing to be done here
|
||||
|
||||
|
||||
// file not open yet => get targetFD and check consistency state
|
||||
|
||||
const auto consistencyState = target.getConsistencyState();
|
||||
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
|
||||
|
||||
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
|
||||
isBuddyMirrorChunk &&
|
||||
!isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
|
||||
{ // this is a request for a buddymirrored chunk on a non-good primary
|
||||
LogContext(logContext).log(Log_NOTICE, "Refusing request. Target consistency is not good. "
|
||||
"targetID: " + StringTk::uintToStr(target.getID()));
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
SessionQuotaInfo quotaInfo(useQuota, enforceQuota, getUserID(), getGroupID() );
|
||||
|
||||
FhgfsOpsErr openChunkRes = sessionLocalFile->openFile(targetFD, getPathInfo(), true, "aInfo);
|
||||
|
||||
return openChunkRes;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Prepares mirroring by storing mirrorNode reference in file session and setting the mirrorToSock
|
||||
* member variable.
|
||||
*
|
||||
* Note: Mirror node reference needs to be released on file session close.
|
||||
*
|
||||
* @param buf used to send initial write msg header to mirror.
|
||||
* @param requestorSock used to receive padding if mirroring fails.
|
||||
* @return FhgfsOpsErr_COMMUNICATION if communication with mirror failed.
|
||||
*/
|
||||
template <class Msg, typename WriteState>
|
||||
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::prepareMirroring(char* buf, size_t bufLen,
|
||||
SessionLocalFile* sessionLocalFile, StorageTarget& target)
|
||||
{
|
||||
std::string logContext = Msg::logContextPref + " (prepare mirroring)";
|
||||
|
||||
// check if mirroring is enabled
|
||||
|
||||
if(!isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_FORWARD) )
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
|
||||
App* app = Program::getApp();
|
||||
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
|
||||
TargetStateStore* targetStates = app->getTargetStateStore();
|
||||
|
||||
// check if secondary is offline or in unclear state
|
||||
|
||||
uint16_t secondaryTargetID = mirrorBuddies->getSecondaryTargetID(getTargetID() );
|
||||
if(unlikely(!secondaryTargetID) )
|
||||
{
|
||||
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
|
||||
StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
return FhgfsOpsErr_UNKNOWNTARGET;
|
||||
}
|
||||
|
||||
CombinedTargetState secondaryState;
|
||||
|
||||
bool getSecondaryStateRes = targetStates->getState(secondaryTargetID, secondaryState);
|
||||
if(unlikely(!getSecondaryStateRes) )
|
||||
{
|
||||
LOG_DEBUG(logContext, Log_DEBUG,
|
||||
"Refusing request. Secondary target has invalid state. "
|
||||
"targetID: " + StringTk::uintToStr(secondaryTargetID) );
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
if( (secondaryState.reachabilityState != TargetReachabilityState_ONLINE) ||
|
||||
(secondaryState.consistencyState != TargetConsistencyState_GOOD) )
|
||||
{
|
||||
if(secondaryState.reachabilityState == TargetReachabilityState_OFFLINE)
|
||||
{ // buddy is offline => mark needed resync and continue with local operation
|
||||
LOG_DEBUG(logContext, Log_DEBUG,
|
||||
"Secondary is offline and will need resync. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
// buddy is marked offline, so local msg processing will be done and buddy needs resync
|
||||
|
||||
target.setBuddyNeedsResync(true);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
if(secondaryState.consistencyState != TargetConsistencyState_NEEDS_RESYNC)
|
||||
{ // unclear buddy state => client must try again
|
||||
LOG_DEBUG(logContext, Log_DEBUG,
|
||||
"Unclear secondary state, caller will have to try again later. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// store mirror node reference in session...
|
||||
|
||||
NodeHandle mirrorToNode = sessionLocalFile->getMirrorNode();
|
||||
|
||||
if(!mirrorToNode)
|
||||
{
|
||||
NodeStoreServers* storageNodes = app->getStorageNodes();
|
||||
TargetMapper* targetMapper = app->getTargetMapper();
|
||||
FhgfsOpsErr referenceErr;
|
||||
|
||||
mirrorToNode = storageNodes->referenceNodeByTargetID(secondaryTargetID, targetMapper,
|
||||
&referenceErr);
|
||||
|
||||
if(unlikely(referenceErr != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
LogContext(logContext).logErr(
|
||||
"Unable to forward to mirror target: " + StringTk::uintToStr(secondaryTargetID) + "; "
|
||||
"Error: " + boost::lexical_cast<std::string>(referenceErr));
|
||||
return referenceErr;
|
||||
}
|
||||
|
||||
mirrorToNode = sessionLocalFile->setMirrorNodeExclusive(mirrorToNode);
|
||||
}
|
||||
|
||||
// send initial write msg header to mirror (retry loop)...
|
||||
|
||||
for( ; ; )
|
||||
{
|
||||
try
|
||||
{
|
||||
// acquire connection to mirror node and send write msg...
|
||||
|
||||
mirrorToSock = mirrorToNode->getConnPool()->acquireStreamSocket();
|
||||
|
||||
WriteLocalFileMsg mirrorWriteMsg(getClientNumID(), getFileHandleID(), getTargetID(),
|
||||
getPathInfo(), getAccessFlags(), getOffset(), getCount());
|
||||
|
||||
if(doSessionCheck() )
|
||||
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_SESSION_CHECK);
|
||||
|
||||
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) )
|
||||
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_DISABLE_IO);
|
||||
|
||||
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA) )
|
||||
mirrorWriteMsg.setUserdataForQuota(getUserID(), getGroupID() );
|
||||
|
||||
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR);
|
||||
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
|
||||
|
||||
unsigned msgLength = mirrorWriteMsg.serializeMessage(buf, bufLen).second;
|
||||
mirrorToSock->send(buf, msgLength, 0);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
catch(SocketConnectException& e)
|
||||
{
|
||||
LogContext(logContext).log(Log_CRITICAL, "Unable to connect to mirror node: " +
|
||||
mirrorToNode->getNodeIDWithTypeStr() + "; "
|
||||
"Msg: " + e.what() );
|
||||
}
|
||||
catch(SocketException& e)
|
||||
{
|
||||
LogContext(logContext).log(Log_CRITICAL, "Communication with mirror node failed: " +
|
||||
mirrorToNode->getNodeIDWithTypeStr() + "; "
|
||||
"Msg: " + e.what() );
|
||||
|
||||
if(mirrorToSock)
|
||||
mirrorToNode->getConnPool()->invalidateStreamSocket(mirrorToSock);
|
||||
|
||||
mirrorToSock = NULL;
|
||||
}
|
||||
|
||||
// error occurred if we got here
|
||||
|
||||
if(!mirrorRetriesLeft)
|
||||
break;
|
||||
|
||||
mirrorRetriesLeft--;
|
||||
|
||||
// next round will be a retry
|
||||
LogContext(logContext).log(Log_NOTICE, "Retrying mirror communication: " +
|
||||
mirrorToNode->getNodeIDWithTypeStr() );
|
||||
|
||||
} // end of retry for-loop
|
||||
|
||||
|
||||
// all retries exhausted if we got here
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send file contents to mirror.
|
||||
*
|
||||
* Note: Supports retries only at beginning of write msg.
|
||||
*
|
||||
* @param buf the buffer that should be sent to the mirror.
|
||||
* @param offset the offset within the chunk file (only used if communication fails and we need to
|
||||
* start over with a new WriteMsg to the mirror).
|
||||
* @param toBeMirrored total remaining mirror data including given bufLen (only used for retries).
|
||||
* @return FhgfsOpsErr_COMMUNICATION if mirroring fails.
|
||||
*/
|
||||
template <class Msg, typename WriteState>
|
||||
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::sendToMirror(const char* buf, size_t bufLen,
|
||||
int64_t offset, int64_t toBeMirrored, SessionLocalFile* sessionLocalFile)
|
||||
{
|
||||
std::string logContext = Msg::logContextPref + " (send to mirror)";
|
||||
|
||||
// check if mirroring enabled
|
||||
|
||||
if(!mirrorToSock)
|
||||
return FhgfsOpsErr_SUCCESS; // either no mirroring enabled or all retries exhausted
|
||||
|
||||
bool isRetryRound = false;
|
||||
|
||||
// send raw data (retry loop)...
|
||||
// (note: if sending fails, retrying requires sending of a new WriteMsg)
|
||||
|
||||
for( ; ; )
|
||||
{
|
||||
try
|
||||
{
|
||||
if(unlikely(isRetryRound) )
|
||||
{ // retry requires reconnect and resend of write msg with current offset
|
||||
|
||||
auto mirrorToNode = sessionLocalFile->getMirrorNode();
|
||||
|
||||
mirrorToSock = mirrorToNode->getConnPool()->acquireStreamSocket();
|
||||
|
||||
WriteLocalFileMsg mirrorWriteMsg(getClientNumID(), getFileHandleID(),
|
||||
getTargetID(), getPathInfo(), getAccessFlags(), offset, toBeMirrored);
|
||||
|
||||
if(doSessionCheck() )
|
||||
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_SESSION_CHECK);
|
||||
|
||||
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) )
|
||||
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_DISABLE_IO);
|
||||
|
||||
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA) )
|
||||
mirrorWriteMsg.setUserdataForQuota(getUserID(), getGroupID() );
|
||||
|
||||
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR);
|
||||
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
|
||||
|
||||
const auto mirrorBuf = MessagingTk::createMsgVec(mirrorWriteMsg);
|
||||
|
||||
mirrorToSock->send(&mirrorBuf[0], mirrorBuf.size(), 0);
|
||||
}
|
||||
|
||||
mirrorToSock->send(buf, bufLen, 0);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
catch(SocketConnectException& e)
|
||||
{
|
||||
auto mirrorToNode = sessionLocalFile->getMirrorNode();
|
||||
|
||||
LogContext(logContext).log(Log_CRITICAL, "Unable to connect to mirror node: " +
|
||||
mirrorToNode->getNodeIDWithTypeStr() + "; "
|
||||
"Msg: " + e.what() );
|
||||
}
|
||||
catch(SocketException& e)
|
||||
{
|
||||
LogContext(logContext).log(Log_CRITICAL, "Communication with mirror node failed: " +
|
||||
sessionLocalFile->getMirrorNode()->getNodeIDWithTypeStr() + "; "
|
||||
"Msg: " + e.what() );
|
||||
|
||||
if(mirrorToSock)
|
||||
sessionLocalFile->getMirrorNode()->getConnPool()->invalidateStreamSocket(mirrorToSock);
|
||||
|
||||
mirrorToSock = NULL;
|
||||
}
|
||||
|
||||
// error occurred if we got here
|
||||
|
||||
if(!mirrorRetriesLeft)
|
||||
break;
|
||||
|
||||
// only allow retries if we're still at the beginning of the write msg.
|
||||
/* (this is because later we don't have all the client data available; and without the mirror
|
||||
response we don't know for sure whether previously sent data was really written or not.) */
|
||||
if(toBeMirrored != getCount() )
|
||||
break;
|
||||
|
||||
mirrorRetriesLeft--;
|
||||
|
||||
// next round will be a retry
|
||||
LogContext(logContext).log(Log_NOTICE, "Retrying mirror communication: " +
|
||||
sessionLocalFile->getMirrorNode()->getNodeIDWithTypeStr() );
|
||||
|
||||
isRetryRound = true;
|
||||
|
||||
} // end of retry for-loop
|
||||
|
||||
// all retries exhausted if we got here
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
/**
|
||||
* Receive response from mirror node, check result, clean up (release mirror sock).
|
||||
*
|
||||
* Note: Does not do retries on communication errors
|
||||
*/
|
||||
template <class Msg, typename WriteState>
|
||||
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::finishMirroring(SessionLocalFile* sessionLocalFile,
|
||||
StorageTarget& target)
|
||||
{
|
||||
std::string logContext = Msg::logContextPref + " (finish mirroring)";
|
||||
|
||||
// check if mirroring enabled
|
||||
|
||||
if(!mirrorToSock)
|
||||
return FhgfsOpsErr_SUCCESS; // mirroring disabled
|
||||
|
||||
App* app = Program::getApp();
|
||||
auto mirrorToNode = sessionLocalFile->getMirrorNode();
|
||||
|
||||
WriteLocalFileRespMsg* writeRespMsg;
|
||||
int64_t mirrorWriteRes;
|
||||
|
||||
|
||||
// receive write msg response from mirror...
|
||||
/* note: we don't have the file contents that were sent by the client anymore at this point, so
|
||||
we cannot do retries here with a new WriteMsg. */
|
||||
|
||||
try
|
||||
{
|
||||
// receive write msg response...
|
||||
|
||||
auto resp = MessagingTk::recvMsgBuf(*mirrorToSock);
|
||||
if (resp.empty())
|
||||
{ // error
|
||||
LogContext(logContext).log(Log_WARNING,
|
||||
"Failed to receive response from mirror: " + mirrorToSock->getPeername() );
|
||||
|
||||
goto cleanup_commerr;
|
||||
}
|
||||
|
||||
// got response => deserialize it...
|
||||
|
||||
auto respMsg = app->getNetMessageFactory()->createFromBuf(std::move(resp));
|
||||
|
||||
if(unlikely(respMsg->getMsgType() != NETMSGTYPE_WriteLocalFileResp) )
|
||||
{ // response invalid (wrong msgType)
|
||||
LogContext(logContext).logErr(
|
||||
"Received invalid response type: " + StringTk::intToStr(respMsg->getMsgType() ) +"; "
|
||||
"expected type: " + StringTk::intToStr(NETMSGTYPE_WriteLocalFileResp) + ". "
|
||||
"Disconnecting: " + mirrorToSock->getPeername() );
|
||||
|
||||
goto cleanup_commerr;
|
||||
}
|
||||
|
||||
// check mirror result and release mirror socket...
|
||||
|
||||
mirrorToNode->getConnPool()->releaseStreamSocket(mirrorToSock);
|
||||
|
||||
writeRespMsg = (WriteLocalFileRespMsg*)respMsg.get();
|
||||
mirrorWriteRes = writeRespMsg->getValue();
|
||||
|
||||
if(likely(mirrorWriteRes == getCount() ) )
|
||||
return FhgfsOpsErr_SUCCESS; // mirror successfully wrote all of the data
|
||||
|
||||
if(mirrorWriteRes >= 0)
|
||||
{ // mirror only wrote a part of the data
|
||||
LogContext(logContext).log(Log_WARNING,
|
||||
"Mirror did not write all of the data (no space left); "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
|
||||
"fileHandle: " + sessionLocalFile->getFileHandleID() );
|
||||
|
||||
return FhgfsOpsErr_NOSPACE;
|
||||
}
|
||||
|
||||
if(mirrorWriteRes == -FhgfsOpsErr_UNKNOWNTARGET)
|
||||
{
|
||||
/* local msg processing shall be done and buddy needs resync
|
||||
(this is normal when a storage is restarted without a broken secondary target, so we
|
||||
report success to a client in this case) */
|
||||
|
||||
LogContext(logContext).log(Log_DEBUG,
|
||||
"Secondary reports unknown target error and will need resync. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
target.setBuddyNeedsResync(true);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
if(mirrorWriteRes == -FhgfsOpsErr_STORAGE_SRV_CRASHED)
|
||||
LogContext(logContext).log(Log_NOTICE, "Potential cache loss for open file handle. "
|
||||
"(Mirror server crash detected.) "
|
||||
"FileHandleID: " + sessionLocalFile->getFileHandleID() + "; "
|
||||
"Mirror: " + mirrorToNode->getNodeIDWithTypeStr() );
|
||||
|
||||
// mirror encountered an error
|
||||
return (FhgfsOpsErr)-mirrorWriteRes; // write response contains negative fhgfs error code
|
||||
|
||||
}
|
||||
catch(SocketException& e)
|
||||
{
|
||||
LogContext(logContext).logErr(std::string("SocketException: ") + e.what() );
|
||||
LogContext(logContext).log(Log_WARNING, "Additional info: "
|
||||
"mirror node: " + mirrorToNode->getNodeIDWithTypeStr() + "; "
|
||||
"fileHandle: " + sessionLocalFile->getFileHandleID() );
|
||||
}
|
||||
|
||||
|
||||
// cleanup after communication error...
|
||||
|
||||
cleanup_commerr:
|
||||
mirrorToNode->getConnPool()->invalidateStreamSocket(mirrorToSock);
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
template <class Msg, typename WriteState>
|
||||
bool WriteLocalFileMsgExBase<Msg, WriteState>::doSessionCheck()
|
||||
{ // do session check only when it is not a mirror session
|
||||
return isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR) ? false :
|
||||
isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_SESSION_CHECK);
|
||||
}
|
||||
213
storage/source/net/message/session/rw/WriteLocalFileMsgEx.h
Normal file
213
storage/source/net/message/session/rw/WriteLocalFileMsgEx.h
Normal file
@@ -0,0 +1,213 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/session/rw/WriteLocalFileMsg.h>
|
||||
#include <common/net/message/session/rw/WriteLocalFileRespMsg.h>
|
||||
#include <session/SessionLocalFile.h>
|
||||
#include <common/storage/StorageErrors.h>
|
||||
|
||||
|
||||
#define WRITEMSG_MIRROR_RETRIES_NUM 1
|
||||
|
||||
class StorageTarget;
|
||||
|
||||
/**
|
||||
* Contains common data needed by implementations of the network protocol
|
||||
* that receive data from the client.
|
||||
*/
|
||||
struct WriteStateBase
|
||||
{
|
||||
const char* logContext;
|
||||
ssize_t exactStaticRecvSize;
|
||||
ssize_t recvLength;
|
||||
int64_t toBeReceived;
|
||||
off_t writeOffset;
|
||||
SessionLocalFile* sessionLocalFile;
|
||||
|
||||
WriteStateBase(const char* logContext, ssize_t exactStaticRecvSize,
|
||||
int64_t toBeReceived, off_t writeOffset, SessionLocalFile* sessionLocalFile)
|
||||
{
|
||||
this->logContext = logContext;
|
||||
this->exactStaticRecvSize = exactStaticRecvSize;
|
||||
this->toBeReceived = toBeReceived;
|
||||
this->writeOffset = writeOffset;
|
||||
this->sessionLocalFile = sessionLocalFile;
|
||||
recvLength = BEEGFS_MIN(exactStaticRecvSize, toBeReceived);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <class Msg, typename WriteState>
|
||||
class WriteLocalFileMsgExBase : public Msg
|
||||
{
|
||||
|
||||
private:
|
||||
Socket* mirrorToSock;
|
||||
unsigned mirrorRetriesLeft;
|
||||
|
||||
public:
|
||||
bool processIncoming(NetMessage::ResponseContext& ctx);
|
||||
|
||||
WriteLocalFileMsgExBase() : Msg()
|
||||
{
|
||||
mirrorToSock = NULL;
|
||||
mirrorRetriesLeft = WRITEMSG_MIRROR_RETRIES_NUM;
|
||||
}
|
||||
|
||||
private:
|
||||
std::pair<bool, int64_t> write(NetMessage::ResponseContext& ctx);
|
||||
|
||||
ssize_t doWrite(int fd, char* buf, size_t count, off_t offset, int& outErrno);
|
||||
|
||||
FhgfsOpsErr openFile(const StorageTarget& target, SessionLocalFile* sessionLocalFile);
|
||||
|
||||
FhgfsOpsErr prepareMirroring(char* buf, size_t bufLen,
|
||||
SessionLocalFile* sessionLocalFile, StorageTarget& target);
|
||||
FhgfsOpsErr sendToMirror(const char* buf, size_t bufLen, int64_t offset, int64_t toBeMirrored,
|
||||
SessionLocalFile* sessionLocalFile);
|
||||
FhgfsOpsErr finishMirroring(SessionLocalFile* sessionLocalFile, StorageTarget& target);
|
||||
|
||||
bool doSessionCheck();
|
||||
|
||||
int64_t incrementalRecvAndWriteStateful(NetMessage::ResponseContext& ctx,
|
||||
SessionLocalFile* sessionLocalFile);
|
||||
|
||||
void incrementalRecvPadding(NetMessage::ResponseContext& ctx, int64_t padLen,
|
||||
SessionLocalFile* sessionLocalFile);
|
||||
|
||||
inline ssize_t recvPadding(NetMessage::ResponseContext& ctx, int64_t toBeReceived)
|
||||
{
|
||||
return static_cast<Msg&>(*this).recvPadding(ctx, toBeReceived);
|
||||
}
|
||||
|
||||
inline void sendResponse(NetMessage::ResponseContext& ctx, int err)
|
||||
{
|
||||
return static_cast<Msg&>(*this).sendResponse(ctx, err);
|
||||
}
|
||||
|
||||
inline bool writeStateInit(WriteState& ws)
|
||||
{
|
||||
return static_cast<Msg&>(*this).writeStateInit(ws);
|
||||
}
|
||||
|
||||
inline ssize_t writeStateRecvData(NetMessage::ResponseContext& ctx, WriteState& ws)
|
||||
{
|
||||
return static_cast<Msg&>(*this).writeStateRecvData(ctx, ws);
|
||||
}
|
||||
|
||||
inline size_t writeStateNext(WriteState& ws, ssize_t writeRes)
|
||||
{
|
||||
return static_cast<Msg&>(*this).writeStateNext(ws, writeRes);
|
||||
}
|
||||
|
||||
public:
|
||||
inline bool isMsgValid() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).isMsgValid();
|
||||
}
|
||||
|
||||
inline bool isMsgHeaderFeatureFlagSet(unsigned flag) const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).isMsgHeaderFeatureFlagSet(flag);
|
||||
}
|
||||
|
||||
inline unsigned getMsgHeaderUserID() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getMsgHeaderUserID();
|
||||
}
|
||||
|
||||
inline uint16_t getTargetID() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getTargetID();
|
||||
}
|
||||
|
||||
inline int64_t getOffset() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getOffset();
|
||||
}
|
||||
|
||||
inline unsigned getUserID() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getUserID();
|
||||
}
|
||||
|
||||
inline unsigned getGroupID() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getGroupID();
|
||||
}
|
||||
|
||||
inline int64_t getCount() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getCount();
|
||||
}
|
||||
|
||||
inline const char* getFileHandleID()
|
||||
{
|
||||
return static_cast<Msg&>(*this).getFileHandleID();
|
||||
}
|
||||
|
||||
inline NumNodeID getClientNumID() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getClientNumID();
|
||||
}
|
||||
|
||||
inline unsigned getAccessFlags() const
|
||||
{
|
||||
return static_cast<const Msg&>(*this).getAccessFlags();
|
||||
}
|
||||
|
||||
inline PathInfo* getPathInfo ()
|
||||
{
|
||||
return static_cast<Msg&>(*this).getPathInfo();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Implements the recv protocol.
|
||||
*/
|
||||
class WriteLocalFileMsgSender : public WriteLocalFileMsg
|
||||
{
|
||||
public:
|
||||
struct WriteState : public WriteStateBase
|
||||
{
|
||||
WriteState(const char* logContext, ssize_t exactStaticRecvSize,
|
||||
int64_t toBeReceived, off_t writeOffset, SessionLocalFile* sessionLocalFile) :
|
||||
WriteStateBase(logContext, exactStaticRecvSize, toBeReceived, writeOffset,
|
||||
sessionLocalFile) {}
|
||||
};
|
||||
|
||||
private:
|
||||
friend class WriteLocalFileMsgExBase<WriteLocalFileMsgSender, WriteState>;
|
||||
|
||||
static const std::string logContextPref;
|
||||
|
||||
ssize_t recvPadding(ResponseContext& ctx, int64_t toBeReceived);
|
||||
|
||||
inline void sendResponse(ResponseContext& ctx, int err)
|
||||
{
|
||||
ctx.sendResponse(WriteLocalFileRespMsg(err));
|
||||
}
|
||||
|
||||
inline bool writeStateInit(WriteState& ws)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
inline ssize_t writeStateRecvData(ResponseContext& ctx, WriteState& ws)
|
||||
{
|
||||
AbstractApp* app = PThread::getCurrentThreadApp();
|
||||
int connMsgMediumTimeout = app->getCommonConfig()->getConnMsgMediumTimeout();
|
||||
ws.recvLength = BEEGFS_MIN(ws.exactStaticRecvSize, ws.toBeReceived);
|
||||
return ctx.getSocket()->recvExactT(ctx.getBuffer(), ws.recvLength, 0, connMsgMediumTimeout);
|
||||
}
|
||||
|
||||
inline size_t writeStateNext(WriteState& ws, ssize_t writeRes)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
typedef WriteLocalFileMsgExBase<WriteLocalFileMsgSender,
|
||||
WriteLocalFileMsgSender::WriteState> WriteLocalFileMsgEx;
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef BEEGFS_NVFS
|
||||
#include <common/net/message/session/rw/WriteLocalFileRDMAMsg.h>
|
||||
#include <common/net/message/session/rw/WriteLocalFileRDMARespMsg.h>
|
||||
#include <common/components/worker/Worker.h>
|
||||
#include <session/SessionLocalFile.h>
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include "WriteLocalFileMsgEx.h"
|
||||
|
||||
|
||||
/**
|
||||
* Implements RDMA read protocol.
|
||||
*/
|
||||
class WriteLocalFileRDMAMsgSender : public WriteLocalFileRDMAMsg
|
||||
{
|
||||
public:
|
||||
struct WriteState : public WriteStateBase
|
||||
{
|
||||
RdmaInfo* rdma;
|
||||
uint64_t rBuf;
|
||||
size_t rLen;
|
||||
uint64_t rOff;
|
||||
int64_t recvSize;
|
||||
|
||||
WriteState(const char* logContext, ssize_t exactStaticRecvSize,
|
||||
int64_t toBeReceived, off_t writeOffset, SessionLocalFile* sessionLocalFile) :
|
||||
WriteStateBase(logContext, exactStaticRecvSize, toBeReceived, writeOffset,
|
||||
sessionLocalFile)
|
||||
{
|
||||
recvSize = toBeReceived;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
friend class WriteLocalFileMsgExBase<WriteLocalFileRDMAMsgSender, WriteState>;
|
||||
|
||||
static const std::string logContextPref;
|
||||
|
||||
ssize_t recvPadding(ResponseContext& ctx, int64_t toBeReceived);
|
||||
|
||||
inline void sendResponse(ResponseContext& ctx, int err)
|
||||
{
|
||||
ctx.sendResponse(WriteLocalFileRDMARespMsg(err));
|
||||
}
|
||||
|
||||
inline bool writeStateInit(WriteState& ws)
|
||||
{
|
||||
ws.rdma = getRdmaInfo();
|
||||
if (unlikely(!ws.rdma->next(ws.rBuf, ws.rLen, ws.rOff)))
|
||||
{
|
||||
LogContext(ws.logContext).logErr("No entities in RDMA buffers.");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline ssize_t writeStateRecvData(ResponseContext& ctx, WriteState& ws)
|
||||
{
|
||||
// Cannot RDMA anything larger than WORKER_BUFIN_SIZE in a single operation
|
||||
// because that is the size of the buffer passed in by the Worker.
|
||||
// TODO: pass around a Buffer with a length instead of unqualified char*.
|
||||
ws.recvLength = BEEGFS_MIN(
|
||||
BEEGFS_MIN(
|
||||
BEEGFS_MIN(ws.exactStaticRecvSize, ws.toBeReceived),
|
||||
(ssize_t)(ws.rLen - ws.rOff)),
|
||||
WORKER_BUFIN_SIZE);
|
||||
return ctx.getSocket()->read(ctx.getBuffer(), ws.recvLength, 0, ws.rBuf + ws.rOff, ws.rdma->key);
|
||||
}
|
||||
|
||||
inline size_t writeStateNext(WriteState& ws, ssize_t writeRes)
|
||||
{
|
||||
ws.rOff += writeRes;
|
||||
if (ws.toBeReceived > 0 && ws.rOff == ws.rLen)
|
||||
{
|
||||
if (unlikely(!ws.rdma->next(ws.rBuf, ws.rLen, ws.rOff)))
|
||||
{
|
||||
LogContext(ws.logContext).logErr("RDMA buffers expended but not all data received. toBeReceived=" +
|
||||
StringTk::uint64ToStr(ws.toBeReceived) + "; "
|
||||
"target: " + StringTk::uintToStr(ws.sessionLocalFile->getTargetID() ) + "; "
|
||||
"file: " + ws.sessionLocalFile->getFileID() + "; ");
|
||||
return ws.recvSize - ws.toBeReceived;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
typedef WriteLocalFileMsgExBase<WriteLocalFileRDMAMsgSender,
|
||||
WriteLocalFileRDMAMsgSender::WriteState> WriteLocalFileRDMAMsgEx;
|
||||
|
||||
#endif /* BEEGFS_NVFS */
|
||||
|
||||
20
storage/source/net/message/storage/GetHighResStatsMsgEx.cpp
Normal file
20
storage/source/net/message/storage/GetHighResStatsMsgEx.cpp
Normal file
@@ -0,0 +1,20 @@
|
||||
#include <program/Program.h>
|
||||
#include <common/net/message/storage/GetHighResStatsRespMsg.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include "GetHighResStatsMsgEx.h"
|
||||
|
||||
|
||||
bool GetHighResStatsMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
HighResStatsList statsHistory;
|
||||
uint64_t lastStatsMS = getValue();
|
||||
|
||||
// get stats history
|
||||
StatsCollector* statsCollector = Program::getApp()->getStatsCollector();
|
||||
statsCollector->getStatsSince(lastStatsMS, statsHistory);
|
||||
|
||||
ctx.sendResponse(GetHighResStatsRespMsg(&statsHistory) );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
12
storage/source/net/message/storage/GetHighResStatsMsgEx.h
Normal file
12
storage/source/net/message/storage/GetHighResStatsMsgEx.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/net/message/storage/GetHighResStatsMsg.h>
|
||||
|
||||
|
||||
class GetHighResStatsMsgEx : public GetHighResStatsMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
56
storage/source/net/message/storage/StatStoragePathMsgEx.cpp
Normal file
56
storage/source/net/message/storage/StatStoragePathMsgEx.cpp
Normal file
@@ -0,0 +1,56 @@
|
||||
#include <program/Program.h>
|
||||
#include <common/net/message/storage/StatStoragePathRespMsg.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include "StatStoragePathMsgEx.h"
|
||||
|
||||
|
||||
bool StatStoragePathMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
int64_t sizeTotal = 0;
|
||||
int64_t sizeFree = 0;
|
||||
int64_t inodesTotal = 0;
|
||||
int64_t inodesFree = 0;
|
||||
|
||||
FhgfsOpsErr statRes = statStoragePath(&sizeTotal, &sizeFree, &inodesTotal, &inodesFree);
|
||||
|
||||
ctx.sendResponse(StatStoragePathRespMsg(statRes, sizeTotal, sizeFree, inodesTotal, inodesFree) );
|
||||
|
||||
App* app = Program::getApp();
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
|
||||
StorageOpCounter_STATSTORAGEPATH, getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
FhgfsOpsErr StatStoragePathMsgEx::statStoragePath(int64_t* outSizeTotal, int64_t* outSizeFree,
|
||||
int64_t* outInodesTotal, int64_t* outInodesFree)
|
||||
{
|
||||
const char* logContext = "StatStoragePathMsg (stat path)";
|
||||
|
||||
App* app = Program::getApp();
|
||||
|
||||
auto* const target = app->getStorageTargets()->getTarget(getTargetID());
|
||||
if (!target)
|
||||
{
|
||||
LogContext(logContext).logErr("Unknown targetID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
return FhgfsOpsErr_UNKNOWNTARGET;
|
||||
}
|
||||
|
||||
const auto& targetPath = target->getPath().str();
|
||||
|
||||
bool statSuccess = StorageTk::statStoragePath(targetPath, outSizeTotal, outSizeFree,
|
||||
outInodesTotal, outInodesFree);
|
||||
if(unlikely(!statSuccess) )
|
||||
{ // error
|
||||
LogContext(logContext).logErr("Unable to statfs() storage path: " + targetPath +
|
||||
" (SysErr: " + System::getErrString() );
|
||||
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
// read and use value from manual free space override file (if it exists)
|
||||
StorageTk::statStoragePathOverride(targetPath, outSizeFree, outInodesFree);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
18
storage/source/net/message/storage/StatStoragePathMsgEx.h
Normal file
18
storage/source/net/message/storage/StatStoragePathMsgEx.h
Normal file
@@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/net/message/storage/StatStoragePathMsg.h>
|
||||
|
||||
// stat of the path to the storage directory, result is similar to statfs
|
||||
|
||||
class StatStoragePathMsgEx : public StatStoragePathMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
FhgfsOpsErr statStoragePath(int64_t* outSizeTotal, int64_t* outSizeFree,
|
||||
int64_t* outInodesTotal, int64_t* outInodesFree);
|
||||
};
|
||||
|
||||
|
||||
432
storage/source/net/message/storage/TruncLocalFileMsgEx.cpp
Normal file
432
storage/source/net/message/storage/TruncLocalFileMsgEx.cpp
Normal file
@@ -0,0 +1,432 @@
|
||||
#include <common/net/message/control/GenericResponseMsg.h>
|
||||
#include <common/net/message/storage/TruncLocalFileRespMsg.h>
|
||||
#include <net/msghelpers/MsgHelperIO.h>
|
||||
#include <program/Program.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include "TruncLocalFileMsgEx.h"
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
|
||||
#define TRUNCLOCALFILE_CHUNKOPENLAGS (O_CREAT|O_WRONLY|O_LARGEFILE)
|
||||
|
||||
|
||||
bool TruncLocalFileMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
const char* logContext = "TruncChunkFileMsg incoming";
|
||||
|
||||
App* app = Program::getApp();
|
||||
|
||||
uint16_t targetID;
|
||||
int targetFD;
|
||||
bool chunkLocked = false;
|
||||
FhgfsOpsErr clientErrRes;
|
||||
DynamicAttribs dynAttribs; // inits storageVersion to 0 (=> initially invalid)
|
||||
StorageTarget* target;
|
||||
|
||||
|
||||
// select the right targetID
|
||||
|
||||
targetID = getTargetID();
|
||||
|
||||
if(isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR) )
|
||||
{ // given targetID refers to a buddy mirror group
|
||||
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
targetID = isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
|
||||
mirrorBuddies->getSecondaryTargetID(targetID) :
|
||||
mirrorBuddies->getPrimaryTargetID(targetID);
|
||||
|
||||
if(unlikely(!targetID) )
|
||||
{ // unknown group ID
|
||||
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
|
||||
StringTk::uintToStr(getTargetID() ) );
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
goto send_response;
|
||||
}
|
||||
}
|
||||
|
||||
target = app->getStorageTargets()->getTarget(targetID);
|
||||
if (!target)
|
||||
{ // unknown targetID
|
||||
if (isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR))
|
||||
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
|
||||
mgmt will mark this target as (p)offline in a few moments. */
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
|
||||
return true;
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
{ // get targetFD and check consistency state
|
||||
bool skipResponse = false;
|
||||
|
||||
targetFD = getTargetFD(*target, ctx, &skipResponse);
|
||||
if(unlikely(targetFD == -1) )
|
||||
{ // failed => consistency state not good
|
||||
if(skipResponse)
|
||||
goto skip_response; // GenericResponseMsg sent
|
||||
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
goto send_response;
|
||||
}
|
||||
}
|
||||
|
||||
// forward to secondary (if appropriate)
|
||||
clientErrRes = forwardToSecondary(*target, ctx, &chunkLocked);
|
||||
if(unlikely(clientErrRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
if(clientErrRes == FhgfsOpsErr_COMMUNICATION)
|
||||
goto skip_response; // GenericResponseMsg sent
|
||||
|
||||
goto send_response;
|
||||
}
|
||||
|
||||
{ // valid targetID
|
||||
std::string entryID(getEntryID() );
|
||||
|
||||
// generate path to chunk file...
|
||||
|
||||
Path chunkDirPath;
|
||||
std::string chunkFilePathStr;
|
||||
const PathInfo *pathInfo = getPathInfo();
|
||||
bool hasOrigFeature = pathInfo->hasOrigFeature();
|
||||
|
||||
StorageTk::getChunkDirChunkFilePath(pathInfo, entryID, hasOrigFeature, chunkDirPath,
|
||||
chunkFilePathStr);
|
||||
|
||||
// truncate file...
|
||||
|
||||
clientErrRes = truncFile(targetID, targetFD, &chunkDirPath, chunkFilePathStr, entryID,
|
||||
hasOrigFeature);
|
||||
|
||||
/* clientErrRes == FhgfsOpsErr_PATHNOTEXISTS && !getFileSize() is special we need to fake
|
||||
* the attributes, to inform the metaserver about the new file size with storageVersion!=0 */
|
||||
if(clientErrRes == FhgfsOpsErr_SUCCESS ||
|
||||
(clientErrRes == FhgfsOpsErr_PATHNOTEXISTS && !getFilesize() ) )
|
||||
{ // truncation successful
|
||||
LOG_DEBUG(logContext, Log_DEBUG, "File truncated: " + chunkFilePathStr);
|
||||
|
||||
// get updated dynamic attribs...
|
||||
|
||||
if(!isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_NODYNAMICATTRIBS) )
|
||||
{
|
||||
if (clientErrRes == FhgfsOpsErr_SUCCESS)
|
||||
getDynamicAttribsByPath(targetFD, chunkFilePathStr.c_str(), targetID, entryID,
|
||||
dynAttribs);
|
||||
else
|
||||
{ // clientErrRes == FhgfsOpsErr_PATHNOTEXISTS && !getFileSize()
|
||||
getFakeDynAttribs(targetID, entryID, dynAttribs);
|
||||
}
|
||||
}
|
||||
|
||||
// change to SUCCESS if it was FhgfsOpsErr_PATHNOTEXISTS
|
||||
clientErrRes = FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
send_response:
|
||||
|
||||
if(chunkLocked) // unlock chunk
|
||||
app->getChunkLockStore()->unlockChunk(targetID, getEntryID() );
|
||||
|
||||
// send response...
|
||||
ctx.sendResponse(
|
||||
TruncLocalFileRespMsg(clientErrRes, dynAttribs.filesize, dynAttribs.allocedBlocks,
|
||||
dynAttribs.modificationTimeSecs, dynAttribs.lastAccessTimeSecs,
|
||||
dynAttribs.storageVersion) );
|
||||
|
||||
skip_response:
|
||||
|
||||
// update operation counters
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
|
||||
StorageOpCounter_TRUNCLOCALFILE, getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param outResponseSent true if a response was sent from within this method; can only be true if
|
||||
* -1 is returned.
|
||||
* @return -1 if consistency state was not good (in which case a special response is sent within
|
||||
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
|
||||
*/
|
||||
int TruncLocalFileMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
|
||||
bool* outResponseSent)
|
||||
{
|
||||
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR);
|
||||
|
||||
*outResponseSent = false;
|
||||
|
||||
// get targetFD and check consistency state
|
||||
|
||||
const auto consistencyState = target.getConsistencyState();
|
||||
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
|
||||
|
||||
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
|
||||
isBuddyMirrorChunk &&
|
||||
!isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
|
||||
{ // this is a msg to a non-good primary
|
||||
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
|
||||
"targetID: " + StringTk::uintToStr(target.getID());
|
||||
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
|
||||
|
||||
*outResponseSent = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return targetFD;
|
||||
}
|
||||
|
||||
FhgfsOpsErr TruncLocalFileMsgEx::truncFile(uint16_t targetId, int targetFD,
|
||||
const Path* chunkDirPath, const std::string& chunkFilePathStr, std::string entryID,
|
||||
bool hasOrigFeature)
|
||||
{
|
||||
const char* logContext = "TruncLocalFileMsg incoming";
|
||||
App* app = Program::getApp();
|
||||
|
||||
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
|
||||
|
||||
int truncRes = MsgHelperIO::truncateAt(targetFD, chunkFilePathStr.c_str(), getFilesize() );
|
||||
if(!truncRes)
|
||||
return FhgfsOpsErr_SUCCESS; // truncate succeeded
|
||||
|
||||
// file or path just doesn't exist or real error?
|
||||
|
||||
int truncErrCode = errno;
|
||||
|
||||
if(unlikely(truncErrCode != ENOENT) )
|
||||
{ // error
|
||||
clientErrRes = FhgfsOpsErrTk::fromSysErr(truncErrCode);
|
||||
if (clientErrRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
|
||||
LogContext(logContext).logErr("Unable to truncate file: " + chunkFilePathStr + ". " +
|
||||
"SysErr: " + System::getErrString(truncErrCode) );
|
||||
|
||||
return clientErrRes;
|
||||
}
|
||||
|
||||
// ENOENT => file (and possibly path to file (dirs) ) doesn't exist
|
||||
|
||||
/* note: if the file doesn't exist, it's generally not an error.
|
||||
but if it should grow to a certain size, we have to create it... */
|
||||
|
||||
if(!getFilesize() )
|
||||
return FhgfsOpsErr_PATHNOTEXISTS; // nothing to be done
|
||||
|
||||
// create the file and re-size it
|
||||
|
||||
bool useQuota = isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_USE_QUOTA);
|
||||
bool enforceQuota = app->getConfig()->getQuotaEnableEnforcement();
|
||||
SessionQuotaInfo quotaInfo(useQuota, enforceQuota, getUserID(), getGroupID());
|
||||
const ExceededQuotaStorePtr exceededQuotaStore = app->getExceededQuotaStores()->get(targetId);
|
||||
|
||||
ChunkStore* chunkDirStore = app->getChunkDirStore();
|
||||
int fd;
|
||||
int openFlags = TRUNCLOCALFILE_CHUNKOPENLAGS;
|
||||
|
||||
FhgfsOpsErr mkChunkRes = chunkDirStore->openChunkFile(targetFD, chunkDirPath, chunkFilePathStr,
|
||||
hasOrigFeature, openFlags, &fd, "aInfo, exceededQuotaStore);
|
||||
|
||||
if (unlikely(mkChunkRes == FhgfsOpsErr_NOTOWNER && useQuota) )
|
||||
{
|
||||
// it already logs a message, so need to further check this ret value
|
||||
chunkDirStore->chmodV2ChunkDirPath(targetFD, chunkDirPath, entryID);
|
||||
|
||||
mkChunkRes = chunkDirStore->openChunkFile(
|
||||
targetFD, chunkDirPath, chunkFilePathStr, hasOrigFeature, openFlags, &fd, "aInfo,
|
||||
exceededQuotaStore);
|
||||
}
|
||||
|
||||
if (mkChunkRes != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
if (mkChunkRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
|
||||
LogContext(logContext).logErr("Failed to create chunkFile: " + chunkFilePathStr);
|
||||
|
||||
return mkChunkRes;
|
||||
}
|
||||
|
||||
// file created => trunc it
|
||||
|
||||
int ftruncRes = ftruncate(fd, getFilesize() );
|
||||
if(unlikely(ftruncRes == -1) )
|
||||
{ // error
|
||||
clientErrRes = FhgfsOpsErrTk::fromSysErr(errno);
|
||||
if (clientErrRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
|
||||
LogContext(logContext).logErr(
|
||||
"Unable to truncate file (after creation): " + chunkFilePathStr + ". " +
|
||||
"Length: " + StringTk::int64ToStr(getFilesize() ) + ". " +
|
||||
"SysErr: " + System::getErrString() );
|
||||
}
|
||||
|
||||
// close file
|
||||
|
||||
int closeRes = close(fd);
|
||||
if(unlikely(closeRes == -1) )
|
||||
{ // error
|
||||
clientErrRes = FhgfsOpsErrTk::fromSysErr(errno);
|
||||
if (clientErrRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
|
||||
LogContext(logContext).logErr(
|
||||
"Unable to close file (after creation/truncation): " + chunkFilePathStr + ". " +
|
||||
"Length: " + StringTk::int64ToStr(getFilesize() ) + ". " +
|
||||
"SysErr: " + System::getErrString() );
|
||||
}
|
||||
|
||||
|
||||
return clientErrRes;
|
||||
}
|
||||
|
||||
bool TruncLocalFileMsgEx::getDynamicAttribsByPath(const int dirFD, const char* path,
|
||||
uint16_t targetID, std::string fileID, DynamicAttribs& outDynAttribs)
|
||||
{
|
||||
SyncedStoragePaths* syncedPaths = Program::getApp()->getSyncedStoragePaths();
|
||||
|
||||
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // L O C K path
|
||||
|
||||
// note: this is locked because we need to get the filesize together with the storageVersion
|
||||
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(dirFD, path,
|
||||
&outDynAttribs.filesize, &outDynAttribs.allocedBlocks, &outDynAttribs.modificationTimeSecs,
|
||||
&outDynAttribs.lastAccessTimeSecs);
|
||||
|
||||
if(getDynAttribsRes)
|
||||
outDynAttribs.storageVersion = storageVersion;
|
||||
|
||||
syncedPaths->unlockPath(fileID, targetID); // U N L O C K path
|
||||
|
||||
return getDynAttribsRes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: only for fileSize == 0 and if the file does not exist yet
|
||||
*/
|
||||
bool TruncLocalFileMsgEx::getFakeDynAttribs(uint16_t targetID, std::string fileID,
|
||||
DynamicAttribs& outDynAttribs)
|
||||
{
|
||||
SyncedStoragePaths* syncedPaths = Program::getApp()->getSyncedStoragePaths();
|
||||
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // L O C K path
|
||||
|
||||
int64_t currentTimeSecs = TimeAbs().getTimeval()->tv_sec;
|
||||
|
||||
outDynAttribs.filesize = 0;
|
||||
outDynAttribs.allocedBlocks = 0;
|
||||
outDynAttribs.modificationTimeSecs = currentTimeSecs;
|
||||
outDynAttribs.lastAccessTimeSecs = currentTimeSecs; /* actually not correct, but better than
|
||||
* 1970 */
|
||||
outDynAttribs.storageVersion = storageVersion;
|
||||
|
||||
syncedPaths->unlockPath(fileID, targetID); // U N L O C K path
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
|
||||
*
|
||||
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
|
||||
* case *outChunkLocked==false is guaranteed).
|
||||
* @throw SocketException if sending of GenericResponseMsg fails.
|
||||
*/
|
||||
FhgfsOpsErr TruncLocalFileMsgEx::forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
|
||||
bool* outChunkLocked)
|
||||
{
|
||||
const char* logContext = "TruncLocalFileMsgEx incoming (forward to secondary)";
|
||||
|
||||
App* app = Program::getApp();
|
||||
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
|
||||
|
||||
*outChunkLocked = false;
|
||||
|
||||
if(!isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR) ||
|
||||
isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
|
||||
return FhgfsOpsErr_SUCCESS; // nothing to do
|
||||
|
||||
// mirrored chunk should be modified, check if resync is in progress and lock chunk
|
||||
*outChunkLocked = target.getBuddyResyncInProgress();
|
||||
if(*outChunkLocked)
|
||||
chunkLockStore->lockChunk(target.getID(), getEntryID() ); // lock chunk
|
||||
|
||||
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
|
||||
addMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
|
||||
|
||||
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_TruncLocalFileResp);
|
||||
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
|
||||
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
|
||||
|
||||
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
|
||||
|
||||
// remove the flag that we just added for secondary
|
||||
unsetMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
|
||||
|
||||
if(unlikely(
|
||||
(commRes == FhgfsOpsErr_COMMUNICATION) &&
|
||||
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
|
||||
{
|
||||
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
// buddy is marked offline, so local msg processing will be done and buddy needs resync
|
||||
target.setBuddyNeedsResync(true);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
|
||||
}
|
||||
|
||||
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
LogContext(logContext).log(Log_DEBUG, "Forwarding failed. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
|
||||
"error: " + boost::lexical_cast<std::string>(commRes));
|
||||
|
||||
if(*outChunkLocked)
|
||||
{ // unlock chunk
|
||||
chunkLockStore->unlockChunk(target.getID(), getEntryID() );
|
||||
*outChunkLocked = false;
|
||||
}
|
||||
|
||||
std::string genericRespStr = "Communication with secondary failed. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
|
||||
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(genericRespStr)));
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
TruncLocalFileRespMsg* respMsg = (TruncLocalFileRespMsg*)rrArgs.outRespMsg.get();
|
||||
FhgfsOpsErr secondaryRes = respMsg->getResult();
|
||||
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
if(secondaryRes == FhgfsOpsErr_UNKNOWNTARGET)
|
||||
{
|
||||
/* local msg processing shall be done and buddy needs resync
|
||||
(this is normal when a storage is restarted without a broken secondary target, so we
|
||||
report success to a client in this case) */
|
||||
|
||||
LogContext(logContext).log(Log_DEBUG,
|
||||
"Secondary reports unknown target error and will need resync. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
target.setBuddyNeedsResync(true);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
if(secondaryRes != FhgfsOpsErr_TOOBIG) // "too big" is a valid error if max filesize exceeded
|
||||
{
|
||||
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
|
||||
boost::lexical_cast<std::string>(secondaryRes) + "; "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
}
|
||||
|
||||
return secondaryRes;
|
||||
}
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
37
storage/source/net/message/storage/TruncLocalFileMsgEx.h
Normal file
37
storage/source/net/message/storage/TruncLocalFileMsgEx.h
Normal file
@@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/storage/TruncLocalFileMsg.h>
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/storage/Path.h>
|
||||
|
||||
class StorageTarget;
|
||||
|
||||
class TruncLocalFileMsgEx : public TruncLocalFileMsg
|
||||
{
|
||||
private:
|
||||
struct DynamicAttribs
|
||||
{
|
||||
DynamicAttribs() : filesize(0), allocedBlocks(0), modificationTimeSecs(0),
|
||||
lastAccessTimeSecs(0), storageVersion(0) {}
|
||||
|
||||
int64_t filesize;
|
||||
int64_t allocedBlocks; // allocated 512byte blocks (relevant for sparse files)
|
||||
int64_t modificationTimeSecs;
|
||||
int64_t lastAccessTimeSecs;
|
||||
uint64_t storageVersion;
|
||||
};
|
||||
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
FhgfsOpsErr truncFile(uint16_t targetId, int targetFD, const Path* chunkDirPath,
|
||||
const std::string& chunkFilePathStr, std::string entryID, bool hasOrigFeature);
|
||||
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
|
||||
bool getDynamicAttribsByPath(const int dirFD, const char* path, uint16_t targetID,
|
||||
std::string fileID, DynamicAttribs& outDynAttribs);
|
||||
bool getFakeDynAttribs(uint16_t targetID, std::string fileID, DynamicAttribs& outDynAttribs);
|
||||
FhgfsOpsErr forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
|
||||
bool* outChunkLocked);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
#include <common/net/message/control/GenericResponseMsg.h>
|
||||
#include <common/net/message/storage/attribs/GetChunkFileAttribsRespMsg.h>
|
||||
#include <program/Program.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include "GetChunkFileAttribsMsgEx.h"
|
||||
|
||||
|
||||
bool GetChunkFileAttribsMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
const char* logContext = "GetChunkFileAttribsMsg incoming";
|
||||
|
||||
App* app = Program::getApp();
|
||||
|
||||
std::string entryID(getEntryID() );
|
||||
|
||||
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
|
||||
int targetFD;
|
||||
struct stat statbuf{};
|
||||
uint64_t storageVersion = 0;
|
||||
|
||||
// select the right targetID
|
||||
|
||||
uint16_t targetID = getTargetID();
|
||||
|
||||
if(isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR) )
|
||||
{ // given targetID refers to a buddy mirror group
|
||||
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
targetID = isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR_SECOND) ?
|
||||
mirrorBuddies->getSecondaryTargetID(targetID) :
|
||||
mirrorBuddies->getPrimaryTargetID(targetID);
|
||||
|
||||
// note: only log message here, error handling will happen below through invalid targetFD
|
||||
if(unlikely(!targetID) )
|
||||
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
|
||||
StringTk::uintToStr(getTargetID() ) );
|
||||
}
|
||||
|
||||
auto* const target = app->getStorageTargets()->getTarget(targetID);
|
||||
if (!target)
|
||||
{
|
||||
if (isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR))
|
||||
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
|
||||
mgmt will mark this target as (p)offline in a few moments. */
|
||||
LOG(GENERAL, NOTICE, "Unknown target ID, refusing request.", targetID);
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
|
||||
return true;
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
goto send_response;
|
||||
}
|
||||
|
||||
{ // get targetFD and check consistency state
|
||||
bool skipResponse = false;
|
||||
|
||||
targetFD = getTargetFD(*target, ctx, &skipResponse);
|
||||
if(unlikely(targetFD == -1) )
|
||||
{ // failed => consistency state not good
|
||||
memset(&statbuf, 0, sizeof(statbuf) ); // (just to mute clang warning)
|
||||
|
||||
if(skipResponse)
|
||||
goto skip_response; // GenericResponseMsg sent
|
||||
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
goto send_response;
|
||||
}
|
||||
}
|
||||
|
||||
{ // valid targetID
|
||||
SyncedStoragePaths* syncedPaths = app->getSyncedStoragePaths();
|
||||
|
||||
int statErrCode = 0;
|
||||
|
||||
std::string chunkPath = StorageTk::getFileChunkPath(getPathInfo(), entryID);
|
||||
|
||||
uint64_t newStorageVersion = syncedPaths->lockPath(entryID, targetID); // L O C K path
|
||||
|
||||
int statRes = fstatat(targetFD, chunkPath.c_str(), &statbuf, 0);
|
||||
if(statRes)
|
||||
{ // file not exists or error
|
||||
statErrCode = errno;
|
||||
}
|
||||
else
|
||||
{
|
||||
storageVersion = newStorageVersion;
|
||||
}
|
||||
|
||||
syncedPaths->unlockPath(entryID, targetID); // U N L O C K path
|
||||
|
||||
// note: non-existing file is not an error (storage version is 0, so nothing will be
|
||||
// updated at the metadata node)
|
||||
|
||||
if((statRes == -1) && (statErrCode != ENOENT))
|
||||
{ // error
|
||||
clientErrRes = FhgfsOpsErr_INTERNAL;
|
||||
|
||||
LogContext(logContext).logErr(
|
||||
"Unable to stat file: " + chunkPath + ". " + "SysErr: "
|
||||
+ System::getErrString(statErrCode));
|
||||
}
|
||||
}
|
||||
|
||||
send_response:
|
||||
ctx.sendResponse(
|
||||
GetChunkFileAttribsRespMsg(clientErrRes, statbuf.st_size, statbuf.st_blocks,
|
||||
statbuf.st_mtime, statbuf.st_atime, storageVersion) );
|
||||
|
||||
skip_response:
|
||||
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
|
||||
StorageOpCounter_GETLOCALFILESIZE, getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param outResponseSent true if a response was sent from within this method; can only be true if
|
||||
* -1 is returned.
|
||||
* @return -1 if consistency state was not good (in which case a special response is sent within
|
||||
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
|
||||
*/
|
||||
int GetChunkFileAttribsMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
|
||||
bool* outResponseSent)
|
||||
{
|
||||
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR);
|
||||
|
||||
*outResponseSent = false;
|
||||
|
||||
// get targetFD and check consistency state
|
||||
|
||||
const auto consistencyState = target.getConsistencyState();
|
||||
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
|
||||
|
||||
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
|
||||
isBuddyMirrorChunk &&
|
||||
!isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR_SECOND) )
|
||||
{ // this is a msg to a non-good primary
|
||||
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
|
||||
"targetID: " + StringTk::uintToStr(target.getID());
|
||||
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
|
||||
|
||||
*outResponseSent = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return targetFD;
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/storage/attribs/GetChunkFileAttribsMsg.h>
|
||||
|
||||
class StorageTarget;
|
||||
|
||||
class GetChunkFileAttribsMsgEx : public GetChunkFileAttribsMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
|
||||
};
|
||||
|
||||
351
storage/source/net/message/storage/attribs/SetLocalAttrMsgEx.cpp
Normal file
351
storage/source/net/message/storage/attribs/SetLocalAttrMsgEx.cpp
Normal file
@@ -0,0 +1,351 @@
|
||||
#include <common/net/message/control/GenericResponseMsg.h>
|
||||
#include <common/net/message/storage/attribs/SetLocalAttrRespMsg.h>
|
||||
#include <common/storage/StorageDefinitions.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <net/msghelpers/MsgHelperIO.h>
|
||||
#include <program/Program.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include "SetLocalAttrMsgEx.h"
|
||||
|
||||
#include <utime.h>
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
|
||||
bool SetLocalAttrMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
const char* logContext = "SetLocalAttrMsgEx incoming";
|
||||
|
||||
App* app = Program::getApp();
|
||||
|
||||
const SettableFileAttribs* attribs = getAttribs();
|
||||
int validAttribs = getValidAttribs();
|
||||
|
||||
uint16_t targetID;
|
||||
bool chunkLocked = false;
|
||||
int targetFD;
|
||||
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
|
||||
DynamicFileAttribs currentDynAttribs(0, 0, 0, 0, 0);
|
||||
StorageTarget* target;
|
||||
|
||||
// select the right targetID
|
||||
|
||||
targetID = getTargetID();
|
||||
|
||||
if(isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR) )
|
||||
{ // given targetID refers to a buddy mirror group
|
||||
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
targetID = isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND) ?
|
||||
mirrorBuddies->getSecondaryTargetID(targetID) :
|
||||
mirrorBuddies->getPrimaryTargetID(targetID);
|
||||
|
||||
if(unlikely(!targetID) )
|
||||
{ // unknown group ID
|
||||
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
|
||||
StringTk::uintToStr(getTargetID() ) );
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
goto send_response;
|
||||
}
|
||||
}
|
||||
|
||||
target = app->getStorageTargets()->getTarget(targetID);
|
||||
if (!target)
|
||||
{ // unknown targetID
|
||||
if (isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR))
|
||||
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
|
||||
mgmt will mark this target as (p)offline in a few moments. */
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
|
||||
return true;
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
return true;
|
||||
}
|
||||
|
||||
{ // get targetFD and check consistency state
|
||||
bool skipResponse = false;
|
||||
|
||||
targetFD = getTargetFD(*target, ctx, &skipResponse);
|
||||
if(unlikely(targetFD == -1) )
|
||||
{ // failed => consistency state not good
|
||||
if(skipResponse)
|
||||
goto skip_response; // GenericResponseMsg sent
|
||||
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
goto send_response;
|
||||
}
|
||||
}
|
||||
|
||||
// forward to secondary (if appropriate)
|
||||
clientErrRes = forwardToSecondary(*target, ctx, &chunkLocked);
|
||||
if(unlikely(clientErrRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
if(clientErrRes == FhgfsOpsErr_COMMUNICATION)
|
||||
goto skip_response; // GenericResponseMsg sent
|
||||
|
||||
goto send_response;
|
||||
}
|
||||
|
||||
if(validAttribs & (SETATTR_CHANGE_MODIFICATIONTIME | SETATTR_CHANGE_LASTACCESSTIME) )
|
||||
{ // we only handle access and modification time updates here
|
||||
struct timespec times[2] = {{0, 0}, {0, 0}};
|
||||
|
||||
if (validAttribs & SETATTR_CHANGE_LASTACCESSTIME)
|
||||
{
|
||||
times[MsgHelperIO_ATIME_POS].tv_sec = attribs->lastAccessTimeSecs;
|
||||
times[MsgHelperIO_ATIME_POS].tv_nsec = 0;
|
||||
}
|
||||
else
|
||||
times[MsgHelperIO_ATIME_POS].tv_nsec = UTIME_OMIT;
|
||||
|
||||
if (validAttribs & SETATTR_CHANGE_MODIFICATIONTIME)
|
||||
{
|
||||
times[MsgHelperIO_MTIME_POS].tv_sec = attribs->modificationTimeSecs;
|
||||
times[MsgHelperIO_MTIME_POS].tv_nsec = 0;
|
||||
}
|
||||
else
|
||||
times[MsgHelperIO_MTIME_POS].tv_nsec = UTIME_OMIT;
|
||||
|
||||
// generate path to chunk file...
|
||||
|
||||
std::string pathStr;
|
||||
|
||||
pathStr = StorageTk::getFileChunkPath(getPathInfo(), getEntryID() );
|
||||
|
||||
// update timestamps...
|
||||
|
||||
// in case of a timestamp update we need extra information on the metadata server, namely
|
||||
// a storageVersion and the current dynamic attribs of the chunk
|
||||
// => set the new times while holding the lock and return the current attribs and a
|
||||
// storageVersion in response later
|
||||
uint64_t storageVersion = Program::getApp()->getSyncedStoragePaths()->lockPath(getEntryID(),
|
||||
targetID);
|
||||
|
||||
int utimeRes = MsgHelperIO::utimensat(targetFD, pathStr.c_str(), times, 0);
|
||||
|
||||
if (utimeRes == 0)
|
||||
{
|
||||
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(targetFD, pathStr.c_str(),
|
||||
¤tDynAttribs.fileSize, ¤tDynAttribs.numBlocks,
|
||||
¤tDynAttribs.modificationTimeSecs, ¤tDynAttribs.lastAccessTimeSecs);
|
||||
|
||||
// If stat failed (after utimensat worked!), something really bad happened, so the
|
||||
// attribs are definitely invalid. Otherwise set storageVersion in dynAttribs
|
||||
if (getDynAttribsRes)
|
||||
currentDynAttribs.storageVersion = storageVersion;
|
||||
}
|
||||
else if (errno == ENOENT)
|
||||
{
|
||||
// Entry doesn't exist. Not an error, but we need to return fake dynamic attributes for
|
||||
// the metadata server to calc the values (fake in this sense means, we send the
|
||||
// timestamps back that we tried to set, but have real filesize and numBlocks, i.e. 0
|
||||
currentDynAttribs.storageVersion = storageVersion;
|
||||
currentDynAttribs.fileSize = 0;
|
||||
currentDynAttribs.numBlocks = 0;
|
||||
currentDynAttribs.modificationTimeSecs = attribs->modificationTimeSecs;
|
||||
currentDynAttribs.lastAccessTimeSecs = attribs->lastAccessTimeSecs;
|
||||
}
|
||||
else
|
||||
{ // error
|
||||
int errCode = errno;
|
||||
|
||||
LogContext(logContext).logErr("Unable to change file time: " + pathStr + ". "
|
||||
"SysErr: " + System::getErrString());
|
||||
|
||||
clientErrRes = FhgfsOpsErrTk::fromSysErr(errCode);
|
||||
}
|
||||
|
||||
Program::getApp()->getSyncedStoragePaths()->unlockPath(getEntryID(), targetID);
|
||||
}
|
||||
|
||||
if(isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_USE_QUOTA) &&
|
||||
(validAttribs & (SETATTR_CHANGE_USERID | SETATTR_CHANGE_GROUPID) ) )
|
||||
{ // we only handle UID and GID updates here
|
||||
uid_t uid = -1;
|
||||
gid_t gid = -1;
|
||||
|
||||
if(validAttribs & SETATTR_CHANGE_USERID)
|
||||
uid = attribs->userID;
|
||||
|
||||
if(validAttribs & SETATTR_CHANGE_GROUPID)
|
||||
gid = attribs->groupID;
|
||||
|
||||
// generate path to chunk file...
|
||||
|
||||
std::string pathStr;
|
||||
|
||||
pathStr = StorageTk::getFileChunkPath(getPathInfo(), getEntryID() );
|
||||
|
||||
// update UID and GID...
|
||||
|
||||
int chownRes = fchownat(targetFD, pathStr.c_str(), uid, gid, 0);
|
||||
if(chownRes == -1)
|
||||
{ // could be an error
|
||||
int errCode = errno;
|
||||
|
||||
if(errCode != ENOENT)
|
||||
{ // unhandled chown() error
|
||||
LogContext(logContext).logErr("Unable to change file owner: " + pathStr + ". "
|
||||
"SysErr: " + System::getErrString() );
|
||||
|
||||
clientErrRes = FhgfsOpsErrTk::fromSysErr(errCode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
send_response:
|
||||
|
||||
if(chunkLocked) // unlock chunk
|
||||
app->getChunkLockStore()->unlockChunk(targetID, getEntryID() );
|
||||
|
||||
ctx.sendResponse(SetLocalAttrRespMsg(clientErrRes, currentDynAttribs));
|
||||
|
||||
skip_response:
|
||||
|
||||
// update operation counters...
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_SETLOCALATTR,
|
||||
getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param outResponseSent true if a response was sent from within this method; can only be true if
|
||||
* -1 is returned.
|
||||
* @return -1 if consistency state was not good (in which case a special response is sent within
|
||||
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
|
||||
*/
|
||||
int SetLocalAttrMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
|
||||
bool* outResponseSent)
|
||||
{
|
||||
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR);
|
||||
|
||||
*outResponseSent = false;
|
||||
|
||||
// get targetFD and check consistency state
|
||||
|
||||
const auto consistencyState = target.getConsistencyState();
|
||||
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
|
||||
|
||||
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
|
||||
isBuddyMirrorChunk &&
|
||||
!isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND) )
|
||||
{ // this is a msg to a non-good primary
|
||||
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
|
||||
"targetID: " + StringTk::uintToStr(target.getID());
|
||||
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
|
||||
|
||||
*outResponseSent = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return targetFD;
|
||||
}
|
||||
|
||||
/**
|
||||
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
|
||||
*
|
||||
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
|
||||
* case *outChunkLocked==false is guaranteed).
|
||||
* @throw SocketException if sending of GenericResponseMsg fails.
|
||||
*/
|
||||
FhgfsOpsErr SetLocalAttrMsgEx::forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
|
||||
bool* outChunkLocked)
|
||||
{
|
||||
const char* logContext = "SetLocalAttrMsg incoming (forward to secondary)";
|
||||
|
||||
App* app = Program::getApp();
|
||||
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
|
||||
|
||||
*outChunkLocked = false;
|
||||
|
||||
if(!isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR) ||
|
||||
isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND) )
|
||||
return FhgfsOpsErr_SUCCESS; // nothing to do
|
||||
|
||||
// mirrored chunk should be modified, check if resync is in progress and lock chunk
|
||||
*outChunkLocked = target.getBuddyResyncInProgress();
|
||||
if(*outChunkLocked)
|
||||
chunkLockStore->lockChunk(target.getID(), getEntryID() ); // lock chunk
|
||||
|
||||
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
|
||||
addMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND);
|
||||
|
||||
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_SetLocalAttrResp);
|
||||
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
|
||||
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
|
||||
|
||||
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
|
||||
|
||||
// remove the flag that we just added for secondary
|
||||
unsetMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND);
|
||||
|
||||
if(unlikely(
|
||||
(commRes == FhgfsOpsErr_COMMUNICATION) &&
|
||||
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
|
||||
{
|
||||
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
// buddy is marked offline, so local msg processing will be done and buddy needs resync
|
||||
target.setBuddyNeedsResync(true);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
|
||||
}
|
||||
|
||||
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
LogContext(logContext).log(Log_DEBUG, "Forwarding failed: "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
|
||||
"error: " + boost::lexical_cast<std::string>(commRes));
|
||||
|
||||
if(*outChunkLocked)
|
||||
{ // unlock chunk
|
||||
chunkLockStore->unlockChunk(target.getID(), getEntryID() );
|
||||
*outChunkLocked = false;
|
||||
}
|
||||
|
||||
std::string genericRespStr = "Communication with secondary failed. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
|
||||
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(genericRespStr)));
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
const auto respMsg = (const SetLocalAttrRespMsg*)rrArgs.outRespMsg.get();
|
||||
FhgfsOpsErr secondaryRes = respMsg->getResult();
|
||||
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
if(secondaryRes == FhgfsOpsErr_UNKNOWNTARGET)
|
||||
{
|
||||
/* local msg processing shall be done and buddy needs resync
|
||||
(this is normal when a storage is restarted without a broken secondary target, so we
|
||||
report success to a client in this case) */
|
||||
|
||||
LogContext(logContext).log(Log_DEBUG,
|
||||
"Secondary reports unknown target error and will need resync. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
target.setBuddyNeedsResync(true);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
|
||||
boost::lexical_cast<std::string>(secondaryRes) + "; "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
return secondaryRes;
|
||||
}
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/net/message/storage/attribs/SetLocalAttrMsg.h>
|
||||
|
||||
class StorageTarget;
|
||||
|
||||
class SetLocalAttrMsgEx : public SetLocalAttrMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
|
||||
FhgfsOpsErr forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
|
||||
bool* outChunkLocked);
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
#include <common/net/message/control/GenericResponseMsg.h>
|
||||
#include <common/net/message/storage/chunkbalancing/CpChunkPathsRespMsg.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
#include "CpChunkPathsMsgEx.h"
|
||||
|
||||
bool CpChunkPathsMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
const char* logContext = "CpChunkPathsMsg incoming";
|
||||
|
||||
LogContext(logContext).logErr("This message is not yet implemented. \n It should relay chunk information from metadata to storage and trigger copy chunk operation. ");
|
||||
|
||||
FhgfsOpsErr cpMsgRes = FhgfsOpsErr_SUCCESS;
|
||||
ctx.sendResponse(CpChunkPathsRespMsg(cpMsgRes));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
ChunkBalancerJob* CpChunkPathsMsgEx::addChunkBalanceJob()
|
||||
{
|
||||
std::lock_guard<Mutex> mutexLock(ChunkBalanceJobMutex);
|
||||
|
||||
ChunkBalancerJob* chunkBalanceJob = nullptr;
|
||||
return chunkBalanceJob;
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/storage/chunkbalancing/CpChunkPathsMsg.h>
|
||||
|
||||
class ChunkBalancerJob;
|
||||
|
||||
class CpChunkPathsMsgEx : public CpChunkPathsMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
private:
|
||||
Mutex ChunkBalanceJobMutex;
|
||||
ChunkBalancerJob* addChunkBalanceJob();
|
||||
|
||||
};
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
#include "RmChunkPathsMsgEx.h"
|
||||
|
||||
bool RmChunkPathsMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
const char* logContext = "RmChunkPathsMsg incoming";
|
||||
|
||||
App* app = Program::getApp();
|
||||
ChunkStore* chunkStore = app->getChunkDirStore();
|
||||
|
||||
uint16_t targetID;
|
||||
StringList& relativePaths = getRelativePaths();
|
||||
StringList failedPaths;
|
||||
|
||||
targetID = getTargetID();
|
||||
|
||||
auto* const target = app->getStorageTargets()->getTarget(targetID);
|
||||
|
||||
if (!target)
|
||||
{ // unknown targetID
|
||||
LogContext(logContext).logErr("Unknown targetID: " + StringTk::uintToStr(targetID));
|
||||
failedPaths = relativePaths;
|
||||
}
|
||||
else
|
||||
{ // valid targetID
|
||||
const int targetFD = isMsgHeaderFeatureFlagSet(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR)
|
||||
? *target->getMirrorFD()
|
||||
: *target->getChunkFD();
|
||||
for(StringListIter iter = relativePaths.begin(); iter != relativePaths.end(); iter++)
|
||||
{
|
||||
// remove chunk
|
||||
int unlinkRes = unlinkat(targetFD, (*iter).c_str(), 0);
|
||||
|
||||
if ( (unlinkRes != 0) && (errno != ENOENT) )
|
||||
{
|
||||
LogContext(logContext).logErr(
|
||||
"Unable to remove entry: " + *iter + "; error: " + System::getErrString());
|
||||
failedPaths.push_back(*iter);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// removal succeeded; this might have been the last entry => try to remove parent directory
|
||||
Path parentDirPath(StorageTk::getPathDirname(*iter));
|
||||
|
||||
chunkStore->rmdirChunkDirPath(targetFD, &parentDirPath);
|
||||
}
|
||||
}
|
||||
|
||||
ctx.sendResponse(RmChunkPathsRespMsg(&failedPaths) );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
|
||||
|
||||
class RmChunkPathsMsgEx : public RmChunkPathsMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,268 @@
|
||||
#include <common/net/message/control/GenericResponseMsg.h>
|
||||
#include <common/net/message/storage/creating/UnlinkLocalFileRespMsg.h>
|
||||
#include <program/Program.h>
|
||||
#include <toolkit/StorageTkEx.h>
|
||||
#include "UnlinkLocalFileMsgEx.h"
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
bool UnlinkLocalFileMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
const char* logContext = "UnlinkChunkFileMsg incoming";
|
||||
|
||||
App* app = Program::getApp();
|
||||
ChunkStore* chunkDirStore = app->getChunkDirStore();
|
||||
|
||||
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
|
||||
|
||||
uint16_t targetID;
|
||||
bool chunkLocked = false;
|
||||
int targetFD = -1;
|
||||
Path chunkDirPath;
|
||||
const PathInfo* pathInfo = getPathInfo();
|
||||
bool hasOrigFeature = pathInfo->hasOrigFeature();
|
||||
int unlinkRes = -1;
|
||||
StorageTarget* target;
|
||||
|
||||
// select the right targetID
|
||||
|
||||
targetID = getTargetID();
|
||||
|
||||
if(isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR) )
|
||||
{ // given targetID refers to a buddy mirror group
|
||||
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
targetID = isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
|
||||
mirrorBuddies->getSecondaryTargetID(targetID) :
|
||||
mirrorBuddies->getPrimaryTargetID(targetID);
|
||||
|
||||
if(unlikely(!targetID) )
|
||||
{ // unknown target
|
||||
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
|
||||
StringTk::uintToStr(getTargetID() ) );
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
goto send_response;
|
||||
}
|
||||
}
|
||||
|
||||
target = app->getStorageTargets()->getTarget(targetID);
|
||||
if (!target)
|
||||
{
|
||||
if (isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR))
|
||||
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
|
||||
mgmt will mark this target as (p)offline in a few moments. */
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
|
||||
return true;
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Unknown targetID.", targetID);
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
return true;
|
||||
}
|
||||
|
||||
{ // get targetFD and check consistency state
|
||||
bool skipResponse = false;
|
||||
|
||||
targetFD = getTargetFD(*target, ctx, &skipResponse);
|
||||
if(unlikely(targetFD == -1) )
|
||||
{ // failed => consistency state not good
|
||||
if(skipResponse)
|
||||
goto skip_response; // GenericResponseMsg sent
|
||||
|
||||
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
|
||||
goto send_response;
|
||||
}
|
||||
}
|
||||
|
||||
// forward to secondary (if appropriate)
|
||||
clientErrRes = forwardToSecondary(*target, ctx, &chunkLocked);
|
||||
if(unlikely(clientErrRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
if(clientErrRes == FhgfsOpsErr_COMMUNICATION)
|
||||
goto skip_response; // GenericResponseMsg sent
|
||||
|
||||
goto send_response;
|
||||
}
|
||||
|
||||
{ // valid targetID
|
||||
|
||||
// generate path to chunk file...
|
||||
|
||||
std::string chunkFilePathStr; // chunkDirPathStr + '/' + entryID
|
||||
|
||||
StorageTk::getChunkDirChunkFilePath(pathInfo, getEntryID(), hasOrigFeature, chunkDirPath,
|
||||
chunkFilePathStr);
|
||||
|
||||
unlinkRes = unlinkat(targetFD, chunkFilePathStr.c_str(), 0);
|
||||
|
||||
if( (unlinkRes == -1) && (errno != ENOENT) )
|
||||
{ // error
|
||||
LogContext(logContext).logErr("Unable to unlink file: " + chunkFilePathStr + ". " +
|
||||
"SysErr: " + System::getErrString() );
|
||||
|
||||
clientErrRes = FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
else
|
||||
{ // success
|
||||
LogContext(logContext).log(Log_DEBUG, "File unlinked: " + chunkFilePathStr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
send_response:
|
||||
|
||||
if(chunkLocked) // unlock chunk
|
||||
app->getChunkLockStore()->unlockChunk(targetID, getEntryID() );
|
||||
|
||||
ctx.sendResponse(UnlinkLocalFileRespMsg(clientErrRes) );
|
||||
|
||||
skip_response:
|
||||
|
||||
// try to rmdir chunkDirPath (in case this was the last chunkfile in a dir)
|
||||
if (!unlinkRes && hasOrigFeature)
|
||||
chunkDirStore->rmdirChunkDirPath(targetFD, &chunkDirPath);
|
||||
|
||||
// update operation counters...
|
||||
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_UNLINK,
|
||||
getMsgHeaderUserID() );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param outResponseSent true if a response was sent from within this method; can only be true if
|
||||
* -1 is returned.
|
||||
* @return -1 if consistency state was not good (in which case a special response is sent within
|
||||
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
|
||||
*/
|
||||
int UnlinkLocalFileMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
|
||||
bool* outResponseSent)
|
||||
{
|
||||
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR);
|
||||
|
||||
*outResponseSent = false;
|
||||
|
||||
// get targetFD and check consistency state
|
||||
|
||||
const auto consistencyState = target.getConsistencyState();
|
||||
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
|
||||
|
||||
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
|
||||
isBuddyMirrorChunk &&
|
||||
!isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
|
||||
{ // this is a msg to a non-good primary
|
||||
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
|
||||
"targetID: " + StringTk::uintToStr(target.getID());
|
||||
|
||||
ctx.sendResponse(
|
||||
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
|
||||
|
||||
*outResponseSent = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return targetFD;
|
||||
}
|
||||
|
||||
/**
|
||||
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
|
||||
*
|
||||
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
|
||||
* case *outChunkLocked==false is guaranteed).
|
||||
* @throw SocketException if sending of GenericResponseMsg fails.
|
||||
*/
|
||||
FhgfsOpsErr UnlinkLocalFileMsgEx::forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
|
||||
bool* outChunkLocked)
|
||||
{
|
||||
const char* logContext = "UnlinkLocalFileMsg incoming (forward to secondary)";
|
||||
|
||||
App* app = Program::getApp();
|
||||
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
|
||||
|
||||
*outChunkLocked = false;
|
||||
|
||||
if(!isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR) ||
|
||||
isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
|
||||
return FhgfsOpsErr_SUCCESS; // nothing to do
|
||||
|
||||
// mirrored chunk should be modified, check if resync is in progress and lock chunk
|
||||
*outChunkLocked = target.getBuddyResyncInProgress();
|
||||
if(*outChunkLocked)
|
||||
chunkLockStore->lockChunk(target.getID(), getEntryID() ); // lock chunk
|
||||
|
||||
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
|
||||
addMsgHeaderFeatureFlag(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
|
||||
|
||||
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_UnlinkLocalFileResp);
|
||||
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
|
||||
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
|
||||
|
||||
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
|
||||
|
||||
// remove the flag that we just added for secondary
|
||||
unsetMsgHeaderFeatureFlag(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
|
||||
|
||||
if(unlikely(
|
||||
(commRes == FhgfsOpsErr_COMMUNICATION) &&
|
||||
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
|
||||
{
|
||||
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
// buddy is marked offline, so local msg processing will be done and buddy needs resync
|
||||
target.setBuddyNeedsResync(true);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
|
||||
}
|
||||
|
||||
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
LogContext(logContext).log(Log_DEBUG, "Forwarding failed. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
|
||||
"error: " + boost::lexical_cast<std::string>(commRes));
|
||||
|
||||
if(*outChunkLocked)
|
||||
{ // unlock chunk
|
||||
chunkLockStore->unlockChunk(target.getID(), getEntryID() );
|
||||
*outChunkLocked = false;
|
||||
}
|
||||
|
||||
std::string genericRespStr = "Communication with secondary failed. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
|
||||
|
||||
ctx.sendResponse(GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR,
|
||||
std::move(genericRespStr)));
|
||||
|
||||
return FhgfsOpsErr_COMMUNICATION;
|
||||
}
|
||||
|
||||
UnlinkLocalFileRespMsg* respMsg = (UnlinkLocalFileRespMsg*)rrArgs.outRespMsg.get();
|
||||
FhgfsOpsErr secondaryRes = respMsg->getResult();
|
||||
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
|
||||
{
|
||||
if(secondaryRes == FhgfsOpsErr_UNKNOWNTARGET)
|
||||
{
|
||||
/* local msg processing shall be done and buddy needs resync
|
||||
(this is normal when a storage is restarted without a broken secondary target, so we
|
||||
report success to a client in this case) */
|
||||
|
||||
LogContext(logContext).log(Log_DEBUG,
|
||||
"Secondary reports unknown target error and will need resync. "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
target.setBuddyNeedsResync(true);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
|
||||
boost::lexical_cast<std::string>(secondaryRes) + "; "
|
||||
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
|
||||
|
||||
return secondaryRes;
|
||||
}
|
||||
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/storage/creating/UnlinkLocalFileMsg.h>
|
||||
|
||||
class StorageTarget;
|
||||
|
||||
class UnlinkLocalFileMsgEx : public UnlinkLocalFileMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
|
||||
FhgfsOpsErr forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
|
||||
bool* outChunkLocked);
|
||||
};
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user