New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

View File

@@ -0,0 +1,745 @@
#include <program/Program.h>
#include <common/components/worker/IncSyncedCounterWork.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
#include <common/toolkit/StringTk.h>
#include "BuddyResyncJob.h"
#include <boost/lexical_cast.hpp>
#define BUDDYRESYNCJOB_MAXDIRWALKDEPTH 2
BuddyResyncJob::BuddyResyncJob(uint16_t targetID) :
PThread("BuddyResyncJob_" + StringTk::uintToStr(targetID)),
targetID(targetID),
status(BuddyResyncJobState_NOTSTARTED),
startTime(0), endTime(0)
{
App* app = Program::getApp();
unsigned numGatherSlaves = app->getConfig()->getTuneNumResyncGatherSlaves();
unsigned numSyncSlavesTotal = app->getConfig()->getTuneNumResyncSlaves();
unsigned numFileSyncSlaves = BEEGFS_MAX((numSyncSlavesTotal / 2), 1);
unsigned numDirSyncSlaves = BEEGFS_MAX((numSyncSlavesTotal / 2), 1);
// prepare slaves (vectors) and result vector
gatherSlaveVec.resize(numGatherSlaves);
fileSyncSlaveVec.resize(numFileSyncSlaves);
dirSyncSlaveVec.resize(numDirSyncSlaves);
}
BuddyResyncJob::~BuddyResyncJob()
{
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin(); iter != gatherSlaveVec.end();
iter++)
{
BuddyResyncerGatherSlave* slave = *iter;
SAFE_DELETE(slave);
}
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
iter != fileSyncSlaveVec.end(); iter++)
{
BuddyResyncerFileSyncSlave* slave = *iter;
SAFE_DELETE(slave);
}
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
iter != dirSyncSlaveVec.end(); iter++)
{
BuddyResyncerDirSyncSlave* slave = *iter;
SAFE_DELETE(slave);
}
}
void BuddyResyncJob::run()
{
// make sure only one job at a time can run!
{
std::lock_guard<Mutex> mutexLock(statusMutex);
if (status == BuddyResyncJobState_RUNNING)
{
LogContext(__func__).logErr("Refusing to run same BuddyResyncJob twice!");
return;
}
else
{
status = BuddyResyncJobState_RUNNING;
startTime = time(NULL);
endTime = 0;
}
}
App* app = Program::getApp();
StorageTargets* storageTargets = app->getStorageTargets();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
TargetMapper* targetMapper = app->getTargetMapper();
NodeStoreServers* storageNodes = app->getStorageNodes();
WorkerList* workerList = app->getWorkers();
bool startGatherSlavesRes;
bool startSyncSlavesRes;
std::string targetPath;
std::string chunksPath;
bool buddyCommIsOverride = false; // treat errors during lastbuddycomm read as "0, no override"
int64_t lastBuddyCommTimeSecs;
int64_t lastBuddyCommSafetyThresholdSecs;
bool checkTopLevelDirRes;
bool walkRes;
auto& target = *storageTargets->getTargets().at(targetID);
shallAbort.setZero();
targetWasOffline = false;
// delete sync candidates and gather queue; just in case there was something from a previous run
syncCandidates.clear();
gatherSlavesWorkQueue.clear();
target.setBuddyResyncInProgress(true);
LogContext(__func__).log(Log_NOTICE,
"Started resync of targetID " + StringTk::uintToStr(targetID));
// before starting the threads make sure every worker knows about the resync (the current work
// package must be finished), for that we use a dummy package
Mutex mutex;
Condition counterIncrementedCond;
SynchronizedCounter numReadyWorkers;
size_t numWorkers = workerList->size();
for (WorkerListIter iter = workerList->begin(); iter != workerList->end(); iter++)
{
Worker* worker = *iter;
PersonalWorkQueue* personalQueue = worker->getPersonalWorkQueue();
MultiWorkQueue* workQueue = worker->getWorkQueue();
IncSyncedCounterWork* incCounterWork = new IncSyncedCounterWork(&numReadyWorkers);
workQueue->addPersonalWork(incCounterWork, personalQueue);
}
numReadyWorkers.waitForCount(numWorkers);
// notify buddy, that resync started and wait for confirmation
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(targetID);
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto buddyNode = storageNodes->referenceNode(buddyNodeID);
StorageResyncStartedMsg storageResyncStartedMsg(buddyTargetID);
const auto respMsg = MessagingTk::requestResponse(*buddyNode, storageResyncStartedMsg,
NETMSGTYPE_StorageResyncStartedResp);
std::pair<bool, std::chrono::system_clock::time_point> lastBuddyComm;
if (!respMsg)
{
LOG(MIRRORING, ERR, "Unable to notify buddy about resync attempt. Resync will not start.",
targetID, buddyTargetID);
setStatus(BuddyResyncJobState_FAILURE);
goto cleanup;
}
startGatherSlavesRes = startGatherSlaves(target);
if (!startGatherSlavesRes)
{
setStatus(BuddyResyncJobState_FAILURE);
goto cleanup;
}
startSyncSlavesRes = startSyncSlaves();
if (!startSyncSlavesRes)
{
setStatus(BuddyResyncJobState_FAILURE);
// terminate gather slaves
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->selfTerminate();
goto cleanup;
}
numDirsDiscovered.setZero();
numDirsMatched.setZero();
// walk over the directories until we reach a certain level and then pass the direcories to
// gather slaves to parallelize it
targetPath = target.getPath().str();
chunksPath = targetPath + "/" + CONFIG_BUDDYMIRROR_SUBDIR_NAME;
lastBuddyComm = target.getLastBuddyComm();
buddyCommIsOverride = lastBuddyComm.first;
lastBuddyCommTimeSecs = std::chrono::system_clock::to_time_t(lastBuddyComm.second);
lastBuddyCommSafetyThresholdSecs = app->getConfig()->getSysResyncSafetyThresholdMins()*60;
if ( (lastBuddyCommSafetyThresholdSecs == 0) && (!buddyCommIsOverride) ) // ignore timestamp file
lastBuddyCommTimeSecs = 0;
else
if (lastBuddyCommTimeSecs > lastBuddyCommSafetyThresholdSecs)
lastBuddyCommTimeSecs -= lastBuddyCommSafetyThresholdSecs;
checkTopLevelDirRes = checkTopLevelDir(chunksPath, lastBuddyCommTimeSecs);
if (!checkTopLevelDirRes)
{
setStatus(BuddyResyncJobState_FAILURE);
// terminate gather slaves
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->selfTerminate();
// terminate sync slaves
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
fileSyncSlaveVec[i]->selfTerminate();
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
dirSyncSlaveVec[i]->selfTerminate();
goto cleanup;
}
walkRes = walkDirs(chunksPath, "", 0, lastBuddyCommTimeSecs);
if (!walkRes)
{
setStatus(BuddyResyncJobState_FAILURE);
// terminate gather slaves
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->selfTerminate();
// terminate sync slaves
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
fileSyncSlaveVec[i]->selfTerminate();
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
dirSyncSlaveVec[i]->selfTerminate();
goto cleanup;
}
// all directories are read => tell gather slave to stop when work queue is empty and wait for
// all to stop
for(size_t i = 0; i < gatherSlaveVec.size(); i++)
{
if (likely(shallAbort.read() == 0))
gatherSlaveVec[i]->setOnlyTerminateIfIdle(true);
else
gatherSlaveVec[i]->setOnlyTerminateIfIdle(false);
gatherSlaveVec[i]->selfTerminate();
}
joinGatherSlaves();
// gather slaves have finished => tell sync slaves to stop when work packages are empty and wait
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
{
if (likely(shallAbort.read() == 0))
fileSyncSlaveVec[i]->setOnlyTerminateIfIdle(true);
else
fileSyncSlaveVec[i]->setOnlyTerminateIfIdle(false);
fileSyncSlaveVec[i]->selfTerminate();
}
for(size_t i = 0; i < dirSyncSlaveVec.size(); i++)
{
if (likely(shallAbort.read() == 0))
dirSyncSlaveVec[i]->setOnlyTerminateIfIdle(true);
else
dirSyncSlaveVec[i]->setOnlyTerminateIfIdle(false);
dirSyncSlaveVec[i]->selfTerminate();
}
joinSyncSlaves();
cleanup:
// wait for gather slaves to stop
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin();
iter != gatherSlaveVec.end(); iter++)
{
BuddyResyncerGatherSlave* slave = *iter;
if(slave)
{
std::lock_guard<Mutex> safeLock(slave->statusMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&(slave->statusMutex));
}
}
bool syncErrors = false;
// wait for sync slaves to stop and save if any errors occured
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
iter != fileSyncSlaveVec.end(); iter++)
{
BuddyResyncerFileSyncSlave* slave = *iter;
if(slave)
{
{
std::lock_guard<Mutex> safeLock(slave->statusMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&(slave->statusMutex));
}
if (slave->getErrorCount() != 0)
syncErrors = true;
}
}
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
iter != dirSyncSlaveVec.end(); iter++)
{
BuddyResyncerDirSyncSlave* slave = *iter;
if(slave)
{
{
std::lock_guard<Mutex> safeLock(slave->statusMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&(slave->statusMutex));
}
if (slave->getErrorCount() != 0)
syncErrors = true;
}
}
if (getStatus() == BuddyResyncJobState_RUNNING) // status not set to anything special
{ // (e.g. FAILURE)
if (shallAbort.read() != 0) // job aborted?
{
setStatus(BuddyResyncJobState_INTERRUPTED);
informBuddy();
}
else if (syncErrors || targetWasOffline.read()) // any sync errors or success?
{
// we must set the buddy BAD if it has been offline during any period of time during which
// the resync was also running. we implicitly do this during resync proper, since resync
// slaves abort with errors if the target is offline. if the target goes offline *after*
// the last proper resync messages has been sent and comes *back* before we try to inform
// it we will never detect that it has been offline at all. concurrently executing
// messages (eg TruncFile) may run between our opportunities to detect the offline state
// and may fail to forward their actions *even though they should forward*. this would
// lead to an inconsistent secondary. since the target has gone offline, the only
// reasonable course of action is to fail to resync entirely.
setStatus(BuddyResyncJobState_ERRORS);
informBuddy();
}
else
{
setStatus(BuddyResyncJobState_SUCCESS);
// unset timestamp override file if an override was set
target.setLastBuddyComm(std::chrono::system_clock::from_time_t(0), true);
// so the target went offline between the previous check "syncErrors || targetWasOffline".
// any message that has tried to forward itself in the intervening time will have seen the
// offline state, but will have been unable to set the buddy to needs-resync because it
// still *is* needs-resync. the resync itself has been perfectly successful, but we have
// to start another one anyway once the target comes back to ensure that no information
// was lost.
target.setBuddyNeedsResync(targetWasOffline.read());
informBuddy();
if (targetWasOffline.read())
LOG(MIRRORING, WARNING,
"Resync successful, but target went offline during finalization. "
"Setting target to needs-resync again.", targetID);
}
}
target.setBuddyResyncInProgress(false);
endTime = time(NULL);
}
void BuddyResyncJob::abort()
{
shallAbort.set(1); // tell the file walk in this class to abort
// set setOnlyTerminateIfIdle on the slaves to false; they will be stopped by the main loop then
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin(); iter != gatherSlaveVec.end();
iter++)
{
BuddyResyncerGatherSlave* slave = *iter;
if(slave)
{
slave->setOnlyTerminateIfIdle(false);
}
}
// stop sync slaves
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
iter != fileSyncSlaveVec.end(); iter++)
{
BuddyResyncerFileSyncSlave* slave = *iter;
if(slave)
{
slave->setOnlyTerminateIfIdle(false);
}
}
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
iter != dirSyncSlaveVec.end(); iter++)
{
BuddyResyncerDirSyncSlave* slave = *iter;
if(slave)
{
slave->setOnlyTerminateIfIdle(false);
}
}
}
bool BuddyResyncJob::startGatherSlaves(const StorageTarget& target)
{
// create a gather slaves if they don't exist yet and start them
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
{
if(!gatherSlaveVec[i])
gatherSlaveVec[i] = new BuddyResyncerGatherSlave(target, &syncCandidates,
&gatherSlavesWorkQueue, i);
try
{
gatherSlaveVec[i]->resetSelfTerminate();
gatherSlaveVec[i]->start();
gatherSlaveVec[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
return false;
}
}
return true;
}
bool BuddyResyncJob::startSyncSlaves()
{
// create sync slaves and start them
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
{
if(!fileSyncSlaveVec[i])
fileSyncSlaveVec[i] = new BuddyResyncerFileSyncSlave(targetID, &syncCandidates, i);
try
{
fileSyncSlaveVec[i]->resetSelfTerminate();
fileSyncSlaveVec[i]->start();
fileSyncSlaveVec[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
// stop already started sync slaves
for(size_t j = 0; j < i; j++)
fileSyncSlaveVec[j]->selfTerminate();
return false;
}
}
for(size_t i = 0; i < dirSyncSlaveVec.size(); i++)
{
if(!dirSyncSlaveVec[i])
dirSyncSlaveVec[i] = new BuddyResyncerDirSyncSlave(targetID, &syncCandidates, i);
try
{
dirSyncSlaveVec[i]->resetSelfTerminate();
dirSyncSlaveVec[i]->start();
dirSyncSlaveVec[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
// stop already started sync slaves
for (size_t j = 0; j < fileSyncSlaveVec.size(); j++)
fileSyncSlaveVec[j]->selfTerminate();
for (size_t j = 0; j < i; j++)
dirSyncSlaveVec[j]->selfTerminate();
return false;
}
}
return true;
}
void BuddyResyncJob::joinGatherSlaves()
{
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->join();
}
void BuddyResyncJob::joinSyncSlaves()
{
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
fileSyncSlaveVec[i]->join();
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
dirSyncSlaveVec[i]->join();
}
void BuddyResyncJob::getJobStats(StorageBuddyResyncJobStatistics& outStats)
{
uint64_t discoveredFiles = 0;
uint64_t matchedFiles = 0;
uint64_t discoveredDirs = numDirsDiscovered.read();
uint64_t matchedDirs = numDirsMatched.read();
uint64_t syncedFiles = 0;
uint64_t syncedDirs = 0;
uint64_t errorFiles = 0;
uint64_t errorDirs = 0;
for(size_t i = 0; i < gatherSlaveVec.size(); i++)
{
BuddyResyncerGatherSlave* slave = gatherSlaveVec[i];
if(slave)
{
uint64_t tmpDiscoveredFiles = 0;
uint64_t tmpMatchedFiles = 0;
uint64_t tmpDiscoveredDirs = 0;
uint64_t tmpMatchedDirs = 0;
slave->getCounters(tmpDiscoveredFiles, tmpMatchedFiles, tmpDiscoveredDirs, tmpMatchedDirs);
discoveredFiles += tmpDiscoveredFiles;
matchedFiles += tmpMatchedFiles;
discoveredDirs += tmpDiscoveredDirs;
matchedDirs += tmpMatchedDirs;
}
}
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
{
BuddyResyncerFileSyncSlave* slave = fileSyncSlaveVec[i];
if(slave)
{
syncedFiles += slave->getNumChunksSynced();
errorFiles += slave->getErrorCount();
}
}
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
{
BuddyResyncerDirSyncSlave* slave = dirSyncSlaveVec[i];
if (slave)
{
syncedDirs += slave->getNumDirsSynced();
discoveredDirs += slave->getNumAdditionalDirsMatched();
matchedDirs += slave->getNumAdditionalDirsMatched();
errorDirs += slave->getErrorCount();
}
}
outStats = StorageBuddyResyncJobStatistics(status, startTime, endTime, discoveredFiles,
discoveredDirs, matchedFiles, matchedDirs, syncedFiles, syncedDirs, errorFiles, errorDirs);
}
void BuddyResyncJob::informBuddy()
{
App* app = Program::getApp();
NodeStore* storageNodes = app->getStorageNodes();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
TargetMapper* targetMapper = app->getTargetMapper();
BuddyResyncJobState status = getStatus();
TargetConsistencyState newTargetState;
if ( (status == BuddyResyncJobState_ERRORS) || (status == BuddyResyncJobState_INTERRUPTED))
newTargetState = TargetConsistencyState_BAD;
else
if (status == BuddyResyncJobState_SUCCESS)
newTargetState = TargetConsistencyState_GOOD;
else
{
LogContext(__func__).log(Log_NOTICE, "Refusing to set a state for buddy target, because "
"resync status isn't well-defined. "
"localTargetID: " + StringTk::uintToStr(targetID) + "; "
"resyncState: " + StringTk::intToStr(status));
return;
}
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(targetID);
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto storageNode = storageNodes->referenceNode(buddyNodeID);
if (!storageNode)
{
LogContext(__func__).logErr(
"Unable to inform buddy about finished resync. TargetID: " + StringTk::uintToStr(targetID)
+ "; buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; buddyNodeID: "
+ buddyNodeID.str() + "; error: unknown storage node");
return;
}
SetTargetConsistencyStatesRespMsg* respMsgCast;
FhgfsOpsErr result;
UInt16List targetIDs;
UInt8List states;
targetIDs.push_back(buddyTargetID);
states.push_back(newTargetState);
SetTargetConsistencyStatesMsg msg(NODETYPE_Storage, &targetIDs, &states, false);
const auto respMsg = MessagingTk::requestResponse(*storageNode, msg,
NETMSGTYPE_SetTargetConsistencyStatesResp);
if (!respMsg)
{
LogContext(__func__).logErr(
"Unable to inform buddy about finished resync. "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"buddyNodeID: " + buddyNodeID.str() + "; "
"error: Communication error");
return;
}
respMsgCast = (SetTargetConsistencyStatesRespMsg*) respMsg.get();
result = respMsgCast->getResult();
if(result != FhgfsOpsErr_SUCCESS)
{
LogContext(__func__).logErr(
"Error while informing buddy about finished resync. "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"buddyNodeID: " + buddyNodeID.str() + "; "
"error: " + boost::lexical_cast<std::string>(result));
}
}
/*
* check the CONFIG_BUDDYMIRROR_SUBDIR_NAME directory
*/
bool BuddyResyncJob::checkTopLevelDir(std::string& path, int64_t lastBuddyCommTimeSecs)
{
struct stat statBuf;
int statRes = stat(path.c_str(), &statBuf);
if(statRes != 0)
{
LogContext(__func__).log(Log_WARNING,
"Couldn't stat chunks directory; resync job can't run. targetID: "
+ StringTk::uintToStr(targetID) + "; path: " + path
+ "; Error: " + System::getErrString(errno));
return false;
}
numDirsDiscovered.increase();
int64_t dirMTime = (int64_t) statBuf.st_mtim.tv_sec;
if(dirMTime > lastBuddyCommTimeSecs)
{ // sync candidate
ChunkSyncCandidateDir candidate("", targetID);
syncCandidates.add(candidate, this);
numDirsMatched.increase();
}
return true;
}
/*
* recursively walk through buddy mir directory until a depth of BUDDYRESYNCJOB_MAXDIRWALKDEPTH is
* reached; everything with a greater depth gets passed to the GatherSlaves to work on it in
* parallel
*/
bool BuddyResyncJob::walkDirs(std::string chunksPath, std::string relPath, int level,
int64_t lastBuddyCommTimeSecs)
{
bool retVal = true;
DIR* dirHandle;
struct dirent* dirEntry;
dirHandle = opendir(std::string(chunksPath + "/" + relPath).c_str());
if(!dirHandle)
{
LogContext(__func__).logErr("Unable to open path. "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"Rel. path: " + relPath + "; "
"Error: " + System::getErrString(errno) );
return false;
}
while ((dirEntry = StorageTk::readdirFiltered(dirHandle)) != NULL)
{
if(shallAbort.read() != 0)
break;
// get stat info
std::string currentRelPath;
if(unlikely(relPath.empty()))
currentRelPath = dirEntry->d_name;
else
currentRelPath = relPath + "/" + dirEntry->d_name;
std::string currentFullPath = chunksPath + "/" + currentRelPath;
struct stat statBuf;
int statRes = stat(currentFullPath.c_str(), &statBuf);
if(statRes != 0)
{
LogContext(__func__).log(Log_WARNING,
"Couldn't stat directory, which was discovered previously. Resync job might not be "
"complete. targetID " + StringTk::uintToStr(targetID) + "; "
"Rel. path: " + relPath + "; "
"Error: " + System::getErrString(errno));
retVal = false;
break; // => one error aborts it all
}
if(S_ISDIR(statBuf.st_mode))
{
// if level of dir is smaller than max, take care of it and recurse into it
if(level < BUDDYRESYNCJOB_MAXDIRWALKDEPTH)
{
numDirsDiscovered.increase();
int64_t dirMTime = (int64_t) statBuf.st_mtim.tv_sec;
if(dirMTime > lastBuddyCommTimeSecs)
{ // sync candidate
ChunkSyncCandidateDir candidate(currentRelPath, targetID);
syncCandidates.add(candidate, this);
numDirsMatched.increase();
}
bool walkRes = walkDirs(chunksPath, currentRelPath, level+1, lastBuddyCommTimeSecs);
if (!walkRes)
retVal = false;
}
else
// otherwise pass it to the slaves; NOTE: gather slave takes full path
gatherSlavesWorkQueue.add(currentFullPath, this);
}
else
{
LOG_DEBUG(__func__, Log_WARNING, "Found a file in directory structure");
}
}
if(!dirEntry && errno) // error occured
{
LogContext(__func__).logErr(
"Unable to read all directories; chunksPath: " + chunksPath + "; relativePath: " + relPath
+ "; SysErr: " + System::getErrString(errno));
retVal = false;
}
int closedirRes = closedir(dirHandle);
if (closedirRes != 0)
LOG_DEBUG(__func__, Log_WARNING,
"Unable to open path. targetID " + StringTk::uintToStr(targetID) + "; Rel. path: "
+ relPath + "; Error: " + System::getErrString(errno));
return retVal;
}

View File

@@ -0,0 +1,90 @@
#pragma once
#include <common/storage/mirroring/BuddyResyncJobStatistics.h>
#include <components/buddyresyncer/BuddyResyncerDirSyncSlave.h>
#include <components/buddyresyncer/BuddyResyncerFileSyncSlave.h>
#include <components/buddyresyncer/BuddyResyncerGatherSlave.h>
#define GATHERSLAVEQUEUE_MAXSIZE 5000
class BuddyResyncJob : public PThread
{
friend class GenericDebugMsgEx;
public:
BuddyResyncJob(uint16_t targetID);
virtual ~BuddyResyncJob();
virtual void run();
void abort();
void getJobStats(StorageBuddyResyncJobStatistics& outStats);
private:
uint16_t targetID;
Mutex statusMutex;
BuddyResyncJobState status;
int64_t startTime;
int64_t endTime;
ChunkSyncCandidateStore syncCandidates;
BuddyResyncerGatherSlaveWorkQueue gatherSlavesWorkQueue;
BuddyResyncerGatherSlaveVec gatherSlaveVec;
BuddyResyncerFileSyncSlaveVec fileSyncSlaveVec;
BuddyResyncerDirSyncSlaveVec dirSyncSlaveVec;
// this thread walks over the top dir structures itself, so we need to track that
AtomicUInt64 numDirsDiscovered;
AtomicUInt64 numDirsMatched;
AtomicInt16 shallAbort; // quasi-boolean
AtomicInt16 targetWasOffline;
bool checkTopLevelDir(std::string& path, int64_t lastBuddyCommTimeSecs);
bool walkDirs(std::string chunksPath, std::string relPath, int level,
int64_t lastBuddyCommTimeSecs);
bool startGatherSlaves(const StorageTarget& target);
bool startSyncSlaves();
void joinGatherSlaves();
void joinSyncSlaves();
public:
uint16_t getTargetID() const
{
return targetID;
}
BuddyResyncJobState getStatus()
{
std::lock_guard<Mutex> mutexLock(statusMutex);
return status;
}
bool isRunning()
{
std::lock_guard<Mutex> mutexLock(statusMutex);
return status == BuddyResyncJobState_RUNNING;
}
void setTargetOffline()
{
targetWasOffline.set(1);
}
private:
void setStatus(BuddyResyncJobState status)
{
std::lock_guard<Mutex> mutexLock(statusMutex);
this->status = status;
}
void informBuddy();
};
typedef std::map<uint16_t, BuddyResyncJob*> BuddyResyncJobMap; //mapping: targetID, job
typedef BuddyResyncJobMap::iterator BuddyResyncJobMapIter;

View File

@@ -0,0 +1,40 @@
#include <program/Program.h>
#include "BuddyResyncer.h"
BuddyResyncer::~BuddyResyncer()
{
// delete remaining jobs
for (BuddyResyncJobMapIter iter = resyncJobMap.begin(); iter != resyncJobMap.end(); iter++)
{
BuddyResyncJob* job = iter->second;
if( job->isRunning() )
{
job->abort();
job->join();
}
SAFE_DELETE(job);
}
}
/**
* @return FhgfsOpsErr_SUCCESS if everything was successfully started, FhgfsOpsErr_INUSE if already
* running
*/
FhgfsOpsErr BuddyResyncer::startResync(uint16_t targetID)
{
bool isNewJob;
// try to add an existing resync job; if it already exists, we get that
BuddyResyncJob* resyncJob = addResyncJob(targetID, isNewJob);
// Job already exists *and* is already running:
if (!isNewJob && resyncJob->isRunning() )
return FhgfsOpsErr_INUSE;
// job is ready and not running
resyncJob->start();
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,59 @@
#pragma once
#include <components/buddyresyncer/BuddyResyncJob.h>
#include <mutex>
/**
* This is not a component that represents a separate thread by itself. Instead, it is the
* controlling frontend for slave threads, which are started and stopped on request (i.e. it is not
* automatically started when the app is started).
*
* Callers should only use methods in this controlling frontend and not access the slave's methods
* directly.
*/
class BuddyResyncer
{
public:
~BuddyResyncer();
FhgfsOpsErr startResync(uint16_t targetID);
private:
BuddyResyncJobMap resyncJobMap;
Mutex resyncJobMapMutex;
public:
BuddyResyncJob* getResyncJob(uint16_t targetID)
{
std::lock_guard<Mutex> mutexLock(resyncJobMapMutex);
BuddyResyncJobMapIter iter = resyncJobMap.find(targetID);
if (iter != resyncJobMap.end())
return iter->second;
else
return NULL;
}
private:
BuddyResyncJob* addResyncJob(uint16_t targetID, bool& outIsNew)
{
std::lock_guard<Mutex> mutexLock(resyncJobMapMutex);
BuddyResyncJobMapIter iter = resyncJobMap.find(targetID);
if (iter != resyncJobMap.end())
{
outIsNew = false;
return iter->second;
}
else
{
BuddyResyncJob* job = new BuddyResyncJob(targetID);
resyncJobMap.insert(BuddyResyncJobMap::value_type(targetID, job) );
outIsNew = true;
return job;
}
}
};

View File

@@ -0,0 +1,395 @@
#include <app/App.h>
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <common/net/message/storage/listing/ListChunkDirIncrementalMsg.h>
#include <common/net/message/storage/listing/ListChunkDirIncrementalRespMsg.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "BuddyResyncerDirSyncSlave.h"
#include <boost/lexical_cast.hpp>
#define CHECK_AT_ONCE 50
BuddyResyncerDirSyncSlave::BuddyResyncerDirSyncSlave(uint16_t targetID,
ChunkSyncCandidateStore* syncCandidates, uint8_t slaveID) :
PThread("BuddyResyncerDirSyncSlave_" + StringTk::uintToStr(targetID) + "-"
+ StringTk::uintToStr(slaveID))
{
this->isRunning = false;
this->targetID = targetID;
this->syncCandidates = syncCandidates;
}
BuddyResyncerDirSyncSlave::~BuddyResyncerDirSyncSlave()
{
}
/**
* This is a component, which is started through its control frontend on-demand at
* runtime and terminates when it's done.
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
* of this thread running at the same time.
*/
void BuddyResyncerDirSyncSlave::run()
{
setIsRunning(true);
try
{
LogContext(__func__).log(Log_DEBUG, "Component started.");
registerSignalHandler();
numAdditionalDirsMatched.setZero();
numDirsSynced.setZero();
errorCount.setZero();
syncLoop();
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerDirSyncSlave::syncLoop()
{
App* app = Program::getApp();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
while (! getSelfTerminateNotIdle())
{
if((syncCandidates->isDirsEmpty()) && (getSelfTerminate()))
break;
ChunkSyncCandidateDir candidate;
syncCandidates->fetch(candidate, this);
if (unlikely(candidate.getTargetID() == 0)) // ignore targetID 0
continue;
std::string relativePath = candidate.getRelativePath();
uint16_t localTargetID = candidate.getTargetID();
// get buddy targetID
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(localTargetID);
// perform sync
FhgfsOpsErr resyncRes = doSync(relativePath, localTargetID, buddyTargetID);
if (resyncRes == FhgfsOpsErr_SUCCESS)
numDirsSynced.increase();
else if (resyncRes != FhgfsOpsErr_INTERRUPTED)
errorCount.increase(); // increment error count if an error occurred; note: if the slaves
// were interrupted from the outside (e.g. ctl) this is not an error
}
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::doSync(const std::string& dirPath, uint16_t localTargetID,
uint16_t buddyTargetID)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
App* app = Program::getApp();
TargetMapper* targetMapper = app->getTargetMapper();
NodeStoreServers* storageNodes = app->getStorageNodes();
// try to find the node with the buddyTargetID
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto node = storageNodes->referenceNode(buddyNodeID);
if(!node)
{
LogContext(__func__).logErr(
"Storage node does not exist; nodeID " + buddyNodeID.str());
return FhgfsOpsErr_UNKNOWNNODE;
}
int64_t offset = 0;
unsigned entriesFetched;
do
{
int64_t newOffset;
StringList names;
IntList entryTypes;
FhgfsOpsErr listRes = getBuddyDirContents(*node, dirPath, buddyTargetID, offset, names,
entryTypes, newOffset);
if(listRes != FhgfsOpsErr_SUCCESS)
{
retVal = listRes;
break;
}
offset = newOffset;
entriesFetched = names.size();
// match locally
FhgfsOpsErr findRes = findChunks(localTargetID, dirPath, names, entryTypes);
if(findRes != FhgfsOpsErr_SUCCESS)
{
retVal = findRes;
break;
}
// delete the remaining chunks/dirs on the buddy
StringList rmPaths;
for (StringListIter iter = names.begin(); iter != names.end(); iter++)
{
std::string path = dirPath + "/" + *iter;
rmPaths.push_back(path);
}
FhgfsOpsErr rmRes = removeBuddyChunkPaths(*node, localTargetID, buddyTargetID, rmPaths);
if (rmRes != FhgfsOpsErr_SUCCESS)
{
retVal = rmRes;
break;
}
if (getSelfTerminateNotIdle())
{
retVal = FhgfsOpsErr_INTERRUPTED;
break;
}
} while (entriesFetched == CHECK_AT_ONCE);
return retVal;
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::getBuddyDirContents(Node& node, const std::string& dirPath,
uint16_t targetID, int64_t offset, StringList& outNames, IntList& outEntryTypes,
int64_t& outNewOffset)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
unsigned msgRetryIntervalMS = 5000;
// get a part of the dir contents from the buddy target
ListChunkDirIncrementalMsg listMsg(targetID, true, dirPath, offset, CHECK_AT_ONCE, false, true);
listMsg.setMsgHeaderTargetID(targetID);
CombinedTargetState state;
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(targetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ( (!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
{
respMsg = MessagingTk::requestResponse(node, listMsg, NETMSGTYPE_ListChunkDirIncrementalResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; sleeping "
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
+ StringTk::uintToStr(targetID));
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes = Program::getApp()->getTargetStateStore()->getState(targetID, state);
}
}
if (!respMsg)
{ // communication error
LogContext(__func__).logErr(
"Communication with storage node failed: " + node.getTypedNodeID());
retVal = FhgfsOpsErr_COMMUNICATION;
}
else
if(!getStateRes)
{
LogContext(__func__).logErr("No valid state for node ID: " + node.getTypedNodeID() );
retVal = FhgfsOpsErr_INTERNAL;
}
else
{
// correct response type received
ListChunkDirIncrementalRespMsg* respMsgCast = (ListChunkDirIncrementalRespMsg*) respMsg.get();
FhgfsOpsErr listRes = respMsgCast->getResult();
if (listRes == FhgfsOpsErr_SUCCESS)
{
outNewOffset = respMsgCast->getNewOffset();
respMsgCast->getNames().swap(outNames);
respMsgCast->getEntryTypes().swap(outEntryTypes);
}
else
if (listRes != FhgfsOpsErr_PATHNOTEXISTS)
{ // not exists is ok, because path might have been deleted
LogContext(__func__).log(Log_WARNING, "Error listing chunks dir; "
"dirPath: " + dirPath + "; "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"node: " + node.getTypedNodeID() + "; "
"Error: " + boost::lexical_cast<std::string>(listRes));
retVal = listRes;
}
}
return retVal;
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::findChunks(uint16_t targetID, const std::string& dirPath,
StringList& inOutNames, IntList& inOutEntryTypes)
{
App* app = Program::getApp();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
const auto& target = app->getStorageTargets()->getTargets().at(targetID);
const int targetFD = *target->getMirrorFD();
StringListIter namesIter = inOutNames.begin();
IntListIter typesIter = inOutEntryTypes.begin();
while (namesIter != inOutNames.end())
{
std::string entryID = *namesIter;
DirEntryType entryType = (DirEntryType)*typesIter;
std::string entryPath;
if (likely(!dirPath.empty()))
entryPath = dirPath + "/" + entryID;
else
entryPath = entryID;
if (DirEntryType_ISDIR(entryType))
{
bool entryExists = StorageTk::pathExists(targetFD, entryPath);
if (!entryExists)
{
// dir not found, so we didn't know about it yet => add it to sync candidate store, so
// that it gets checked and we get a list of its contents;
ChunkSyncCandidateDir syncCandidate(entryPath, targetID);
syncCandidates->add(syncCandidate, this);
numAdditionalDirsMatched.increase();
}
// no matter if found or not: remove it from the list, because we do not explicitely
// delete directories on the buddy
namesIter = inOutNames.erase(namesIter);
typesIter = inOutEntryTypes.erase(typesIter);
}
else
{
// need to lock the chunk to check it
chunkLockStore->lockChunk(targetID, entryID);
bool entryExists = StorageTk::pathExists(targetFD, entryPath);
if (entryExists)
{
// chunk found => delete it from list an unlock it
namesIter = inOutNames.erase(namesIter);
typesIter = inOutEntryTypes.erase(typesIter);
chunkLockStore->unlockChunk(targetID, entryID);
}
else
{
// chunk not found => keep lock; will be unlocked after removal
namesIter++;
typesIter++;
}
}
}
return FhgfsOpsErr_SUCCESS;
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::removeBuddyChunkPaths(Node& node, uint16_t localTargetID,
uint16_t buddyTargetID, StringList& paths)
{
unsigned msgRetryIntervalMS = 5000;
ChunkLockStore* chunkLockStore = Program::getApp()->getChunkLockStore();
RmChunkPathsMsg rmMsg(buddyTargetID, &paths);
rmMsg.addMsgHeaderFeatureFlag(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR);
rmMsg.setMsgHeaderTargetID(buddyTargetID);
CombinedTargetState state;
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ((!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE))
{
respMsg = MessagingTk::requestResponse(node, rmMsg, NETMSGTYPE_RmChunkPathsResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; sleeping "
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
+ StringTk::uintToStr(targetID));
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
}
}
// no matter if that succeeded or not we unlock all chunks here first
for (StringListIter iter = paths.begin(); iter != paths.end(); iter++)
{
std::string entryID = StorageTk::getPathBasename(*iter);
chunkLockStore->unlockChunk(localTargetID, entryID);
}
if (!respMsg)
{ // communication error
LogContext(__func__).logErr(
"Communication with storage node failed: " + node.getTypedNodeID());
return FhgfsOpsErr_COMMUNICATION;
}
else
if(!getStateRes)
{
LogContext(__func__).logErr("No valid state for node ID: " + node.getTypedNodeID() );
return FhgfsOpsErr_INTERNAL;
}
else
{
// correct response type received
RmChunkPathsRespMsg* respMsgCast = (RmChunkPathsRespMsg*) respMsg.get();
StringList& failedPaths = respMsgCast->getFailedPaths();
for(StringListIter iter = failedPaths.begin(); iter != failedPaths.end(); iter++)
{
LogContext(__func__).logErr("Chunk path could not be deleted; "
"path: " + *iter + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"node: " + node.getTypedNodeID());
}
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,106 @@
#pragma once
#include <common/nodes/Node.h>
#include <common/storage/StorageErrors.h>
#include <common/threading/PThread.h>
#include <components/buddyresyncer/SyncCandidate.h>
class BuddyResyncerDirSyncSlave : public PThread
{
friend class BuddyResyncer; // (to grant access to internal mutex)
friend class BuddyResyncJob; // (to grant access to internal mutex)
public:
BuddyResyncerDirSyncSlave(uint16_t targetID, ChunkSyncCandidateStore* syncCandidates,
uint8_t slaveID);
virtual ~BuddyResyncerDirSyncSlave();
private:
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
AtomicSizeT onlyTerminateIfIdle;
AtomicUInt64 numDirsSynced;
AtomicUInt64 numAdditionalDirsMatched;
AtomicUInt64 errorCount;
bool isRunning; // true if an instance of this component is currently running
uint16_t targetID;
ChunkSyncCandidateStore* syncCandidates;
virtual void run();
void syncLoop();
FhgfsOpsErr doSync(const std::string& dirPath, uint16_t localTargetID,
uint16_t buddyTargetID);
FhgfsOpsErr getBuddyDirContents(Node& node, const std::string& dirPath, uint16_t targetID,
int64_t offset, StringList& outNames, IntList& outEntryTypes, int64_t& outNewOffset);
FhgfsOpsErr findChunks(uint16_t targetID, const std::string& dirPath, StringList& inOutNames,
IntList& inOutEntryTypes);
FhgfsOpsErr removeBuddyChunkPaths(Node& node, uint16_t localTargetID, uint16_t buddyTargetID,
StringList& paths);
public:
// getters & setters
bool getIsRunning()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
void setOnlyTerminateIfIdle(bool value)
{
if (value)
onlyTerminateIfIdle.set(1);
else
onlyTerminateIfIdle.setZero();
}
bool getOnlyTerminateIfIdle()
{
if (onlyTerminateIfIdle.read() == 0)
return false;
else
return true;
}
uint64_t getNumDirsSynced()
{
return numDirsSynced.read();
}
uint64_t getNumAdditionalDirsMatched()
{
return numAdditionalDirsMatched.read();
}
uint64_t getErrorCount()
{
return errorCount.read();
}
private:
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
}
};
typedef std::list<BuddyResyncerDirSyncSlave*> BuddyResyncerDirSyncSlaveList;
typedef BuddyResyncerDirSyncSlaveList::iterator BuddyResyncerDirSyncSlaveListIter;
typedef std::vector<BuddyResyncerDirSyncSlave*> BuddyResyncerDirSyncSlaveVec;
typedef BuddyResyncerDirSyncSlaveVec::iterator BuddyResyncerDirSyncSlaveVecIter;

View File

@@ -0,0 +1,471 @@
#include <app/App.h>
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <common/net/message/storage/mirroring/ResyncLocalFileMsg.h>
#include <common/net/message/storage/mirroring/ResyncLocalFileRespMsg.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "BuddyResyncerFileSyncSlave.h"
#include <boost/lexical_cast.hpp>
#define PROCESS_AT_ONCE 1
#define SYNC_BLOCK_SIZE (1024*1024) // 1M
BuddyResyncerFileSyncSlave::BuddyResyncerFileSyncSlave(uint16_t targetID,
ChunkSyncCandidateStore* syncCandidates, uint8_t slaveID) :
PThread("BuddyResyncerFileSyncSlave_" + StringTk::uintToStr(targetID) + "-"
+ StringTk::uintToStr(slaveID))
{
this->isRunning = false;
this->syncCandidates = syncCandidates;
this->targetID = targetID;
}
BuddyResyncerFileSyncSlave::~BuddyResyncerFileSyncSlave()
{
}
/**
* This is a component, which is started through its control frontend on-demand at
* runtime and terminates when it's done.
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
* of this thread running at the same time.
*/
void BuddyResyncerFileSyncSlave::run()
{
setIsRunning(true);
try
{
LogContext(__func__).log(Log_DEBUG, "Component started.");
registerSignalHandler();
numChunksSynced.setZero();
errorCount.setZero();
syncLoop();
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerFileSyncSlave::syncLoop()
{
App* app = Program::getApp();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
while (! getSelfTerminateNotIdle())
{
if((syncCandidates->isFilesEmpty()) && (getSelfTerminate()))
break;
ChunkSyncCandidateFile candidate;
syncCandidates->fetch(candidate, this);
if (unlikely(candidate.getTargetID() == 0)) // ignore targetID 0
continue;
std::string relativePath = candidate.getRelativePath();
uint16_t localTargetID = candidate.getTargetID();
// get buddy targetID
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(localTargetID);
// perform sync
FhgfsOpsErr resyncRes = doResync(relativePath, localTargetID, buddyTargetID);
if (resyncRes == FhgfsOpsErr_SUCCESS)
numChunksSynced.increase();
else
if (resyncRes != FhgfsOpsErr_INTERRUPTED)
errorCount.increase();
}
}
FhgfsOpsErr BuddyResyncerFileSyncSlave::doResync(std::string& chunkPathStr, uint16_t localTargetID,
uint16_t buddyTargetID)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
unsigned msgRetryIntervalMS = 5000;
App* app = Program::getApp();
TargetMapper* targetMapper = app->getTargetMapper();
NodeStoreServers* storageNodes = app->getStorageNodes();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
std::string entryID = StorageTk::getPathBasename(chunkPathStr);
// try to find the node with the buddyTargetID
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto node = storageNodes->referenceNode(buddyNodeID);
if(!node)
{
LogContext(__func__).log(Log_WARNING,
"Storage node does not exist; nodeID " + buddyNodeID.str());
return FhgfsOpsErr_UNKNOWNNODE;
}
int64_t offset = 0;
ssize_t readRes = 0;
unsigned resyncMsgFlags = 0;
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_BUDDYMIRROR;
LogContext(__func__).log(Log_DEBUG,
"File sync started. chunkPath: " + chunkPathStr + "; localTargetID: "
+ StringTk::uintToStr(localTargetID) + "; buddyTargetID"
+ StringTk::uintToStr(buddyTargetID));
do
{
boost::scoped_array<char> data(new char[SYNC_BLOCK_SIZE]);
const auto& target = app->getStorageTargets()->getTargets().at(localTargetID);
// lock the chunk
chunkLockStore->lockChunk(localTargetID, entryID);
const int fd = openat(*target->getMirrorFD(), chunkPathStr.c_str(), O_RDONLY | O_NOATIME);
if (fd == -1)
{
int errCode = errno;
if(errCode == ENOENT)
{ // chunk was deleted => no error
// delete the mirror chunk and return
bool rmRes = removeBuddyChunkUnlocked(*node, buddyTargetID, chunkPathStr);
if (!rmRes) // rm failed; stop resync
{
LogContext(__func__).log(Log_WARNING,
"File sync not started. chunkPath: " + chunkPathStr + "; localTargetID: "
+ StringTk::uintToStr(localTargetID) + "; buddyTargetID: "
+ StringTk::uintToStr(buddyTargetID));
retVal = FhgfsOpsErr_INTERNAL;
}
}
else // error => log and return
{
LogContext(__func__).logErr(
"Open of chunk failed. chunkPath: " + chunkPathStr + "; targetID: "
+ StringTk::uintToStr(localTargetID) + "; Error: "
+ System::getErrString(errCode));
retVal = FhgfsOpsErr_INTERNAL;
}
chunkLockStore->unlockChunk(localTargetID, entryID);
goto cleanup;
}
int seekRes = lseek(fd, offset, SEEK_SET);
if (seekRes == -1)
{
LogContext(__func__).logErr(
"Seeking in chunk failed. chunkPath: " + chunkPathStr + "; targetID: "
+ StringTk::uintToStr(localTargetID) + "; offset: " + StringTk::int64ToStr(offset));
chunkLockStore->unlockChunk(localTargetID, entryID);
goto cleanup;
}
readRes = read(fd, data.get(), SYNC_BLOCK_SIZE);
if( readRes == -1)
{
LogContext(__func__).logErr("Error during read; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + System::getErrString(errno));
retVal = FhgfsOpsErr_INTERNAL;
goto end_of_loop;
}
if(readRes > 0)
{
const char zeroBuf[RESYNCER_SPARSE_BLOCK_SIZE] = { 0 };
// check if sparse blocks are in the buffer
ssize_t bufPos = 0;
bool dataFound = false;
while (bufPos < readRes)
{
size_t cmpLen = BEEGFS_MIN(readRes-bufPos, RESYNCER_SPARSE_BLOCK_SIZE);
int cmpRes = memcmp(data.get() + bufPos, zeroBuf, cmpLen);
if(cmpRes != 0)
dataFound = true;
else // sparse area detected
{
if(dataFound) // had data before
{
resyncMsgFlags |= RESYNCLOCALFILEMSG_CHECK_SPARSE; // let the receiver do a check
break; // and stop checking here
}
}
bufPos += cmpLen;
}
// this inner loop is over and there are only sparse areas
/* make sure we always send a msg at offset==0 to truncate the file and allow concurrent
writers in a big inital sparse area */
if(offset && (readRes > 0) && (readRes == SYNC_BLOCK_SIZE) && !dataFound)
{
goto end_of_loop;
// => no transfer needed
}
/* let the receiver do a check, because we might be sending a sparse block at beginnig or
end of file */
if(!dataFound)
resyncMsgFlags |= RESYNCLOCALFILEMSG_CHECK_SPARSE;
}
{
ResyncLocalFileMsg resyncMsg(data.get(), chunkPathStr, buddyTargetID, offset, readRes);
if (!readRes || (readRes < SYNC_BLOCK_SIZE) ) // last iteration, set attribs and trunc buddy chunk
{
struct stat statBuf;
int statRes = fstat(fd, &statBuf);
if (statRes == 0)
{
if(statBuf.st_size < offset)
{ // in case someone truncated the file while we're reading at a high offset
offset = statBuf.st_size;
resyncMsg.setOffset(offset);
}
else
if(offset && !readRes)
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_TRUNC;
int mode = statBuf.st_mode;
unsigned userID = statBuf.st_uid;
unsigned groupID = statBuf.st_gid;
int64_t mtimeSecs = statBuf.st_mtim.tv_sec;
int64_t atimeSecs = statBuf.st_atim.tv_sec;
SettableFileAttribs chunkAttribs = {mode, userID,groupID, mtimeSecs, atimeSecs};
resyncMsg.setChunkAttribs(chunkAttribs);
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_SETATTRIBS;
}
else
{
LogContext(__func__).logErr("Error getting chunk attributes; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + System::getErrString(errno));
}
}
resyncMsg.setMsgHeaderFeatureFlags(resyncMsgFlags);
resyncMsg.setMsgHeaderTargetID(buddyTargetID);
CombinedTargetState state;
bool getStateRes =
Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ( (!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
{
respMsg = MessagingTk::requestResponse(*node, resyncMsg,
NETMSGTYPE_ResyncLocalFileResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; sleeping "
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
+ StringTk::uintToStr(targetID));
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes =
Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
}
}
if (!respMsg)
{ // communication error
LogContext(__func__).log(Log_WARNING,
"Communication with storage node failed: " + node->getTypedNodeID());
retVal = FhgfsOpsErr_COMMUNICATION;
// set readRes to non-zero to force exiting loop
readRes = -2;
}
else
if(!getStateRes)
{
LogContext(__func__).log(Log_WARNING,
"No valid state for node ID: " + node->getTypedNodeID());
retVal = FhgfsOpsErr_INTERNAL;
// set readRes to non-zero to force exiting loop
readRes = -2;
}
else
{
// correct response type received
ResyncLocalFileRespMsg* respMsgCast = (ResyncLocalFileRespMsg*) respMsg.get();
FhgfsOpsErr syncRes = respMsgCast->getResult();
if(syncRes != FhgfsOpsErr_SUCCESS)
{
LogContext(__func__).log(Log_WARNING, "Error during resync; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + boost::lexical_cast<std::string>(syncRes));
retVal = syncRes;
// set readRes to non-zero to force exiting loop
readRes = -2;
}
}
}
end_of_loop:
int closeRes = close(fd);
if (closeRes == -1)
{
LogContext(__func__).log(Log_WARNING, "Error closing file descriptor; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + System::getErrString(errno));
}
// unlock the chunk
chunkLockStore->unlockChunk(localTargetID, entryID);
// increment offset for next iteration
offset += readRes;
if ( getSelfTerminateNotIdle() )
{
retVal = FhgfsOpsErr_INTERRUPTED;
break;
}
} while (readRes == SYNC_BLOCK_SIZE);
cleanup:
LogContext(__func__).log(Log_DEBUG, "File sync finished. chunkPath: " + chunkPathStr);
return retVal;
}
/**
* Note: Chunk has to be locked by caller.
*/
bool BuddyResyncerFileSyncSlave::removeBuddyChunkUnlocked(Node& node, uint16_t buddyTargetID,
std::string& pathStr)
{
bool retVal = true;
unsigned msgRetryIntervalMS = 5000;
std::string entryID = StorageTk::getPathBasename(pathStr);
StringList rmPaths;
rmPaths.push_back(pathStr);
RmChunkPathsMsg rmMsg(buddyTargetID, &rmPaths);
rmMsg.addMsgHeaderFeatureFlag(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR);
rmMsg.setMsgHeaderTargetID(buddyTargetID);
CombinedTargetState state;
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ( (!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
{
respMsg = MessagingTk::requestResponse(node, rmMsg, NETMSGTYPE_RmChunkPathsResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; "
"sleeping " + StringTk::uintToStr(msgRetryIntervalMS) + " ms before retry. "
"targetID: " + StringTk::uintToStr(targetID) );
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
}
}
if (!respMsg)
{ // communication error
LogContext(__func__).logErr(
"Communication with storage node failed: " + node.getTypedNodeID() );
return false;
}
else
if(!getStateRes)
{
LogContext(__func__).log(Log_WARNING,
"No valid state for node ID: " + node.getTypedNodeID() );
return false;
}
else
{
// correct response type received
RmChunkPathsRespMsg* respMsgCast = (RmChunkPathsRespMsg*) respMsg.get();
StringList& failedPaths = respMsgCast->getFailedPaths();
for (StringListIter iter = failedPaths.begin(); iter != failedPaths.end(); iter++)
{
LogContext(__func__).logErr("Chunk path could not be deleted; "
"path: " + *iter + "; "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"node: " + node.getTypedNodeID());
retVal = false;
}
}
return retVal;
}

View File

@@ -0,0 +1,98 @@
#pragma once
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/nodes/Node.h>
#include <common/storage/StorageErrors.h>
#include <common/threading/PThread.h>
#include <mutex>
class BuddyResyncerFileSyncSlave : public PThread
{
friend class BuddyResyncer; // (to grant access to internal mutex)
friend class BuddyResyncJob; // (to grant access to internal mutex)
public:
BuddyResyncerFileSyncSlave(uint16_t targetID, ChunkSyncCandidateStore* syncCandidates,
uint8_t slaveID);
virtual ~BuddyResyncerFileSyncSlave();
private:
AtomicSizeT onlyTerminateIfIdle; // atomic quasi-bool
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
AtomicUInt64 numChunksSynced;
AtomicUInt64 errorCount;
bool isRunning; // true if an instance of this component is currently running
uint16_t targetID;
ChunkSyncCandidateStore* syncCandidates;
virtual void run();
void syncLoop();
FhgfsOpsErr doResync(std::string& chunkPathStr, uint16_t localTargetID,
uint16_t buddyTargetID);
bool removeBuddyChunkUnlocked(Node& node, uint16_t buddyTargetID, std::string& pathStr);
public:
// getters & setters
bool getIsRunning()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
void setOnlyTerminateIfIdle(bool value)
{
if (value)
onlyTerminateIfIdle.set(1);
else
onlyTerminateIfIdle.setZero();
}
bool getOnlyTerminateIfIdle()
{
if (onlyTerminateIfIdle.read() == 0)
return false;
else
return true;
}
uint64_t getNumChunksSynced()
{
return numChunksSynced.read();
}
uint64_t getErrorCount()
{
return errorCount.read();
}
private:
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
}
};
typedef std::list<BuddyResyncerFileSyncSlave*> BuddyResyncerFileSyncSlaveList;
typedef BuddyResyncerFileSyncSlaveList::iterator BuddyResyncerFileSyncSlaveListIter;
typedef std::vector<BuddyResyncerFileSyncSlave*> BuddyResyncerFileSyncSlaveVec;
typedef BuddyResyncerFileSyncSlaveVec::iterator BuddyResyncerFileSyncSlaveVecIter;

View File

@@ -0,0 +1,162 @@
#include <app/App.h>
#include <toolkit/StorageTkEx.h>
#include <storage/StorageTargets.h>
#include <program/Program.h>
#include <mutex>
#include "BuddyResyncerGatherSlave.h"
Mutex BuddyResyncerGatherSlave::staticGatherSlavesMutex;
std::map<std::string, BuddyResyncerGatherSlave*> BuddyResyncerGatherSlave::staticGatherSlaves;
BuddyResyncerGatherSlave::BuddyResyncerGatherSlave(const StorageTarget& target,
ChunkSyncCandidateStore* syncCandidates, BuddyResyncerGatherSlaveWorkQueue* workQueue,
uint8_t slaveID) :
PThread("BuddyResyncerGatherSlave_" + StringTk::uintToStr(target.getID()) + "-" +
StringTk::uintToStr(slaveID)),
target(target)
{
this->isRunning = false;
this->syncCandidates = syncCandidates;
this->workQueue = workQueue;
const std::lock_guard<Mutex> lock(staticGatherSlavesMutex);
staticGatherSlaves[this->getName()] = this;
}
BuddyResyncerGatherSlave::~BuddyResyncerGatherSlave()
{
}
/**
* This is a component, which is started through its control frontend on-demand at
* runtime and terminates when it's done.
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
* of this thread running at the same time.
*/
void BuddyResyncerGatherSlave::run()
{
setIsRunning(true);
numChunksDiscovered.setZero();
numChunksMatched.setZero();
numDirsDiscovered.setZero();
numDirsMatched.setZero();
try
{
LogContext(__func__).log(Log_DEBUG, "Component started.");
registerSignalHandler();
workLoop();
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
}
catch(std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerGatherSlave::workLoop()
{
const unsigned maxOpenFDsNum = 20; // max open FDs => max path sub-depth for efficient traversal
while (!getSelfTerminateNotIdle())
{
if ((workQueue->queueEmpty()) && (getSelfTerminate()))
break;
// get a directory to scan
std::string pathStr = workQueue->fetch(this);
if(unlikely(pathStr.empty()))
continue;
int nftwRes = nftw(pathStr.c_str(), handleDiscoveredEntry, maxOpenFDsNum, FTW_ACTIONRETVAL);
if(nftwRes == -1)
{ // error occurred
LogContext(__func__).logErr("Error during chunks walk. SysErr: " + System::getErrString());
}
}
}
int BuddyResyncerGatherSlave::handleDiscoveredEntry(const char* path,
const struct stat* statBuf, int ftwEntryType, struct FTW* ftwBuf)
{
std::string chunksPath;
BuddyResyncerGatherSlave* thisStatic = nullptr;
{
const std::lock_guard<Mutex> lock(staticGatherSlavesMutex);
thisStatic = staticGatherSlaves[PThread::getCurrentThreadName()];
}
App* app = Program::getApp();
Config* cfg = app->getConfig();
const auto& targetPath = thisStatic->target.getPath().str();
chunksPath = targetPath + "/" + CONFIG_BUDDYMIRROR_SUBDIR_NAME;
if (strlen(path) <= chunksPath.length())
return FTW_CONTINUE;
std::string relPathStr = path + chunksPath.size() + 1;
if ( relPathStr.empty() )
return FTW_CONTINUE;
const auto lastBuddyComm = thisStatic->target.getLastBuddyComm();
const bool buddyCommIsOverride = lastBuddyComm.first;
int64_t lastBuddyCommTimeSecs = std::chrono::system_clock::to_time_t(lastBuddyComm.second);
int64_t lastBuddyCommSafetyThresholdSecs = cfg->getSysResyncSafetyThresholdMins()*60;
if ( (lastBuddyCommSafetyThresholdSecs == 0) && (!buddyCommIsOverride) ) // ignore timestamp file
lastBuddyCommTimeSecs = 0;
else
if (lastBuddyCommTimeSecs > lastBuddyCommSafetyThresholdSecs)
lastBuddyCommTimeSecs -= lastBuddyCommSafetyThresholdSecs;
if(ftwEntryType == FTW_D) // directory
{
thisStatic->numDirsDiscovered.increase();
int64_t dirModificationTime = (int64_t)statBuf->st_mtim.tv_sec;
if(dirModificationTime > lastBuddyCommTimeSecs)
{ // sync candidate
ChunkSyncCandidateDir candidate(relPathStr, thisStatic->target.getID());
thisStatic->syncCandidates->add(candidate, thisStatic);
thisStatic->numDirsMatched.increase();
}
}
else
if(ftwEntryType == FTW_F) // file
{
// we found a chunk
thisStatic->numChunksDiscovered.increase();
// we need to use ctime here, because mtime can be set manually (even to the future)
time_t chunkChangeTime = statBuf->st_ctim.tv_sec;
if(chunkChangeTime > lastBuddyCommTimeSecs)
{ // sync candidate
std::string relPathStr = path + chunksPath.size() + 1;
ChunkSyncCandidateFile candidate(relPathStr, thisStatic->target.getID());
thisStatic->syncCandidates->add(candidate, thisStatic);
thisStatic->numChunksMatched.increase();
}
}
return FTW_CONTINUE;
}

View File

@@ -0,0 +1,182 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/components/ComponentInitException.h>
#include <common/threading/PThread.h>
#include <ftw.h>
class StorageTarget;
#define GATHERSLAVEQUEUE_MAXSIZE 5000
class BuddyResyncerGatherSlaveWorkQueue
{
/*
* This is more or less just a small class for convenience, that is tightly coupled to
* BuddyResyncerGatherSlave and BuddyResyncerJob
*/
public:
BuddyResyncerGatherSlaveWorkQueue(): gatherSlavesWorkQueueLen(0) { }
private:
StringList paths;
size_t gatherSlavesWorkQueueLen; // used to avoid constant calling of size() method of list
Mutex mutex;
Condition pathAddedCond;
Condition pathFetchedCond;
public:
void add(std::string& path, PThread* caller)
{
unsigned waitTimeoutMS = 3000;
const std::lock_guard<Mutex> lock(mutex);
while (gatherSlavesWorkQueueLen > GATHERSLAVEQUEUE_MAXSIZE)
{
if((caller) && (unlikely(caller->getSelfTerminate())))
break;
pathFetchedCond.timedwait(&mutex, waitTimeoutMS);
}
paths.push_back(path);
gatherSlavesWorkQueueLen++;
pathAddedCond.signal();
}
std::string fetch(PThread* caller)
{
unsigned waitTimeoutMS = 3000;
const std::lock_guard<Mutex> lock(mutex);
while (paths.empty())
{
if((caller) && (unlikely(caller->getSelfTerminate())))
{
return "";
}
pathAddedCond.timedwait(&mutex, waitTimeoutMS);
}
std::string retVal = paths.front();
paths.pop_front();
gatherSlavesWorkQueueLen--;
pathFetchedCond.signal();
return retVal;
}
bool queueEmpty()
{
const std::lock_guard<Mutex> lock(mutex);
return gatherSlavesWorkQueueLen == 0;
}
void clear()
{
const std::lock_guard<Mutex> lock(mutex);
paths.clear();
gatherSlavesWorkQueueLen = 0;
}
};
class BuddyResyncerGatherSlave : public PThread
{
friend class BuddyResyncer; // (to grant access to internal mutex)
friend class BuddyResyncJob; // (to grant access to internal mutex)
public:
BuddyResyncerGatherSlave(const StorageTarget& target, ChunkSyncCandidateStore* syncCandidates,
BuddyResyncerGatherSlaveWorkQueue* workQueue, uint8_t slaveID);
virtual ~BuddyResyncerGatherSlave();
void workLoop();
private:
AtomicSizeT onlyTerminateIfIdle; // atomic quasi-bool
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
const StorageTarget& target;
AtomicUInt64 numChunksDiscovered;
AtomicUInt64 numChunksMatched;
AtomicUInt64 numDirsDiscovered;
AtomicUInt64 numDirsMatched;
bool isRunning; // true if an instance of this component is currently running
ChunkSyncCandidateStore* syncCandidates;
BuddyResyncerGatherSlaveWorkQueue* workQueue;
// nftw() callback needs access the slave threads
static Mutex staticGatherSlavesMutex;
static std::map<std::string, BuddyResyncerGatherSlave*> staticGatherSlaves;
virtual void run();
static int handleDiscoveredEntry(const char* path, const struct stat* statBuf,
int ftwEntryType, struct FTW* ftwBuf);
public:
// getters & setters
bool getIsRunning()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
void getCounters(uint64_t& outNumChunksDiscovered, uint64_t& outNumChunksMatched,
uint64_t& outNumDirsDiscovered, uint64_t& outNumDirsMatched)
{
outNumChunksDiscovered = numChunksDiscovered.read();
outNumChunksMatched = numChunksMatched.read();
outNumDirsDiscovered = numDirsDiscovered.read();
outNumDirsMatched = numDirsMatched.read();
}
void setOnlyTerminateIfIdle(bool value)
{
if (value)
onlyTerminateIfIdle.set(1);
else
onlyTerminateIfIdle.setZero();
}
bool getOnlyTerminateIfIdle()
{
if (onlyTerminateIfIdle.read() == 0)
return false;
else
return true;
}
private:
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
}
};
typedef std::vector<BuddyResyncerGatherSlave*> BuddyResyncerGatherSlaveVec;
typedef BuddyResyncerGatherSlaveVec::iterator BuddyResyncerGatherSlaveVecIter;

View File

@@ -0,0 +1,44 @@
#pragma once
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <string>
/**
* A storage sync candidate. Has a target ID and a path.
*/
class ChunkSyncCandidateDir
{
public:
ChunkSyncCandidateDir(const std::string& relativePath, const uint16_t targetID)
: relativePath(relativePath), targetID(targetID)
{ }
ChunkSyncCandidateDir()
: targetID(0)
{ }
private:
std::string relativePath;
uint16_t targetID;
public:
const std::string& getRelativePath() const { return relativePath; }
uint16_t getTargetID() const { return targetID; }
};
/**
* A storage sync candidate that also has an onlyAttribs flag.
*/
class ChunkSyncCandidateFile : public ChunkSyncCandidateDir
{
public:
ChunkSyncCandidateFile(const std::string& relativePath, uint16_t targetID)
: ChunkSyncCandidateDir(relativePath, targetID)
{ }
ChunkSyncCandidateFile() = default;
};
typedef SyncCandidateStore<ChunkSyncCandidateDir, ChunkSyncCandidateFile> ChunkSyncCandidateStore;