New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

View File

@@ -0,0 +1,61 @@
#include "DatagramListener.h"
#include <common/net/message/NetMessageTypes.h>
DatagramListener::DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
AcknowledgmentStore* ackStore, unsigned short udpPort, bool restrictOutboundInterfaces):
AbstractDatagramListener("DGramLis", netFilter, localNicList, ackStore, udpPort,
restrictOutboundInterfaces)
{
}
DatagramListener::~DatagramListener()
{
}
void DatagramListener::handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg)
{
HighResolutionStats stats; // currently ignored
std::shared_ptr<StandardSocket> sock = findSenderSock(fromAddr->sin_addr);
if (sock == nullptr)
{
log.log(Log_WARNING, "Could not handle incoming message: no socket");
return;
}
NetMessage::ResponseContext rctx(fromAddr, sock.get(), sendBuf, DGRAMMGR_SENDBUF_SIZE, &stats);
const auto messageType = netMessageTypeToStr(msg->getMsgType());
switch(msg->getMsgType() )
{
// valid messages within this context
case NETMSGTYPE_Ack:
case NETMSGTYPE_Dummy:
case NETMSGTYPE_HeartbeatRequest:
case NETMSGTYPE_Heartbeat:
case NETMSGTYPE_MapTargets:
case NETMSGTYPE_PublishCapacities:
case NETMSGTYPE_RemoveNode:
case NETMSGTYPE_RefreshStoragePools:
case NETMSGTYPE_RefreshTargetStates:
case NETMSGTYPE_SetMirrorBuddyGroup:
{
if(!msg->processIncoming(rctx) )
{
LOG(GENERAL, WARNING,
"Problem encountered during handling of incoming message.", messageType);
}
} break;
default:
{ // valid, but not within this context
log.logErr(
"Received a message that is invalid within the current context "
"from: " + Socket::ipaddrToStr(fromAddr->sin_addr) + "; "
"type: " + messageType );
} break;
};
}

View File

@@ -0,0 +1,20 @@
#pragma once
#include <common/components/AbstractDatagramListener.h>
class DatagramListener : public AbstractDatagramListener
{
public:
DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
AcknowledgmentStore* ackStore, unsigned short udpPort,
bool restrictOutboundInterfaces);
virtual ~DatagramListener();
protected:
virtual void handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg);
private:
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,144 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/components/AbstractDatagramListener.h>
#include <common/components/ComponentInitException.h>
#include <common/nodes/NodeStoreServers.h>
#include <common/threading/PThread.h>
#include <common/Common.h>
class StorageTarget;
class InternodeSyncer : public PThread
{
public:
InternodeSyncer();
virtual ~InternodeSyncer();
static bool downloadAndSyncTargetStates(UInt16List& outTargetIDs,
UInt8List& outReachabilityStates, UInt8List& outConsistencyStates);
static bool downloadAndSyncNodes();
static bool downloadAndSyncTargetMappings();
static bool downloadAndSyncMirrorBuddyGroups();
static bool downloadAndSyncStoragePools();
static bool downloadAllExceededQuotaLists(
const std::map<uint16_t, std::unique_ptr<StorageTarget>>& targets);
static bool downloadExceededQuotaList(uint16_t targetId, QuotaDataType idType,
QuotaLimitType exType, UIntList* outIDList, FhgfsOpsErr& error);
static void syncClientSessions(const std::vector<NodeHandle>& clientsList);
void publishTargetState(uint16_t targetID, TargetConsistencyState targetState);
bool publishLocalTargetStateChanges(const TargetStateMap& oldStates,
const TargetStateMap& changes);
static bool registerNode(AbstractDatagramListener* dgramLis);
static bool registerTargetMappings();
static void requestBuddyTargetStates();
private:
LogContext log;
Mutex forceTargetStatesUpdateMutex;
Mutex forcePublishCapacitiesMutex;
Mutex forceStoragePoolsUpdateMutex;
Mutex forceCheckNetworkMutex;
bool forceTargetStatesUpdate; // true to force update of target states
bool forcePublishCapacities; // true to force publishing target capacities
bool forceStoragePoolsUpdate; // true to force update of storage pools
bool forceCheckNetwork; // true to force update of network interfaces
virtual void run();
void syncLoop();
// returns true if the local interfaces have changed
bool checkNetwork();
void dropIdleConns();
unsigned dropIdleConnsByStore(NodeStoreServers* nodes);
void updateTargetStatesAndBuddyGroups();
void publishTargetCapacities();
void forceMgmtdPoolsRefresh();
static void printSyncNodesResults(NodeType nodeType, NumNodeIDList* addedNodes,
NumNodeIDList* removedNodes);
bool publishTargetStateChanges(UInt16List& targetIDs, UInt8List& oldStates,
UInt8List& newStates);
static bool downloadAllExceededQuotaLists(uint16_t targetId);
public:
// inliners
void setForceTargetStatesUpdate()
{
std::lock_guard<Mutex> safeLock(forceTargetStatesUpdateMutex);
this->forceTargetStatesUpdate = true;
}
void setForcePublishCapacities()
{
std::lock_guard<Mutex> safeLock(forcePublishCapacitiesMutex);
this->forcePublishCapacities = true;
}
void setForceStoragePoolsUpdate()
{
std::lock_guard<Mutex> lock(forceStoragePoolsUpdateMutex);
forceStoragePoolsUpdate = true;
}
void setForceCheckNetwork()
{
std::lock_guard<Mutex> lock(forceCheckNetworkMutex);
forceCheckNetwork = true;
}
private:
// inliners
bool getAndResetForceTargetStatesUpdate()
{
std::lock_guard<Mutex> safeLock(forceTargetStatesUpdateMutex);
bool retVal = this->forceTargetStatesUpdate;
this->forceTargetStatesUpdate = false;
return retVal;
}
bool getAndResetForcePublishCapacities()
{
std::lock_guard<Mutex> safeLock(forcePublishCapacitiesMutex);
bool retVal = this->forcePublishCapacities;
this->forcePublishCapacities = false;
return retVal;
}
bool getAndResetForceStoragePoolsUpdate()
{
std::lock_guard<Mutex> lock(forceStoragePoolsUpdateMutex);
bool retVal = forceStoragePoolsUpdate;
forceStoragePoolsUpdate = false;
return retVal;
}
bool getAndResetForceCheckNetwork()
{
std::lock_guard<Mutex> lock(forceCheckNetworkMutex);
bool retVal = forceCheckNetwork;
forceCheckNetwork = false;
return retVal;
}
};

View File

@@ -0,0 +1,47 @@
#include <app/App.h>
#include <program/Program.h>
#include "StorageStatsCollector.h"
/**
* Note: Other than the common StatsCollector::collectStats(), this method can handle multiple work
* queues.
*/
void StorageStatsCollector::collectStats()
{
App* app = Program::getApp();
MultiWorkQueueMap* workQueueMap = app->getWorkQueueMap();
HighResolutionStats newStats;
const std::lock_guard<Mutex> lock(mutex);
// get stats from first queue as basis
MultiWorkQueueMapIter iter = workQueueMap->begin();
iter->second->getAndResetStats(&newStats);
// add the stat values from following queues
iter++;
for( ; iter != workQueueMap->end(); iter++)
{
HighResolutionStats currentStats;
iter->second->getAndResetStats(&currentStats);
HighResolutionStatsTk::addHighResRawStats(currentStats, newStats);
HighResolutionStatsTk::addHighResIncStats(currentStats, newStats);
}
// set current stats time
newStats.rawVals.statsTimeMS = TimeAbs().getTimeMS();
// take care of max history length
if(statsList.size() == historyLength)
statsList.pop_back();
// push new stats to front
statsList.push_front(newStats);
}

View File

@@ -0,0 +1,25 @@
#pragma once
#include <common/components/StatsCollector.h>
/**
* Common StatsCollector cannot handle multiple work queues, so this derived class overrides
* the collectStats() method to handle multiple work queues.
*/
class StorageStatsCollector : public StatsCollector
{
public:
StorageStatsCollector(unsigned collectIntervalMS, unsigned historyLength):
StatsCollector(NULL, collectIntervalMS, historyLength)
{
// nothing to be done here
}
virtual ~StorageStatsCollector() {}
protected:
virtual void collectStats();
};

View File

@@ -0,0 +1,38 @@
#include "StorageBenchOperator.h"
int StorageBenchOperator::initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize,
int64_t size, int threads, bool odirect, StorageBenchType type)
{
return this->slave.initAndStartStorageBench(targetIDs, blocksize, size, threads, odirect, type);
}
int StorageBenchOperator::cleanup(UInt16List* targetIDs)
{
return this->slave.cleanup(targetIDs);
}
int StorageBenchOperator::stopBenchmark()
{
return this->slave.stopBenchmark();
}
StorageBenchStatus StorageBenchOperator::getStatusWithResults(UInt16List* targetIDs,
StorageBenchResultsMap* outResults)
{
return this->slave.getStatusWithResults(targetIDs, outResults);
}
void StorageBenchOperator::shutdownBenchmark()
{
this->slave.shutdownBenchmark();
}
void StorageBenchOperator::waitForShutdownBenchmark()
{
this->slave.waitForShutdownBenchmark();
}

View File

@@ -0,0 +1,45 @@
#pragma once
#include "StorageBenchSlave.h"
class StorageBenchOperator
{
public:
StorageBenchOperator() {}
int initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize, int64_t size,
int threads, bool odirect, StorageBenchType type);
int cleanup(UInt16List* targetIDs);
int stopBenchmark();
StorageBenchStatus getStatusWithResults(UInt16List* targetIDs,
StorageBenchResultsMap* outResults);
void shutdownBenchmark();
void waitForShutdownBenchmark();
private:
StorageBenchSlave slave;
protected:
public:
// inliners
StorageBenchStatus getStatus()
{
return this->slave.getStatus();
}
StorageBenchType getType()
{
return this->slave.getType();
}
int getLastRunErrorCode()
{
return this->slave.getLastRunErrorCode();
}
};

View File

@@ -0,0 +1,832 @@
#include <common/system/System.h>
#include <common/toolkit/StorageTk.h>
#include <common/toolkit/StringTk.h>
#include <components/worker/StorageBenchWork.h>
#include <program/Program.h>
#include "StorageBenchSlave.h"
#include <mutex>
#define STORAGEBENCH_STORAGE_SUBDIR_NAME "benchmark"
#define STORAGEBENCH_READ_PIPE_TIMEOUT_MS 2000
/*
* initialize and starts the storage benchmark with the given informations
*
* @param targetIDs a list with the targetIDs which the benchmark tests
* @param blocksize the blocksize for the benchmark
* @param size the size for the benchmark
* @param threads the number (simulated clients) of threads for the benchmark
* @param type the type of the benchmark
* @return the error code, 0 if the benchmark was initialize successful (STORAGEBENCH_ERROR..)
*
*/
int StorageBenchSlave::initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize,
int64_t size, int threads, bool odirect, StorageBenchType type)
{
const char* logContext = "Storage Benchmark (init)";
int lastError = STORAGEBENCH_ERROR_NO_ERROR;
int retVal = STORAGEBENCH_ERROR_NO_ERROR;
this->resetSelfTerminate();
const std::lock_guard<Mutex> lock(statusMutex);
if (STORAGEBENCHSTATUS_IS_ACTIVE(this->status))
{
LogContext(logContext).logErr(
std::string("Benchmark is already running. It's not possible to start a benchmark if a"
"benchmark is running."));
retVal = STORAGEBENCH_ERROR_RUNTIME_IS_RUNNING;
}
else
{
retVal = initStorageBench(targetIDs, blocksize, size, threads, odirect, type);
}
if(retVal == STORAGEBENCH_ERROR_NO_ERROR)
{
if (this->status != StorageBenchStatus_INITIALISED)
{
LogContext(logContext).logErr(
std::string("Benchmark not correctly initialized."));
this->lastRunErrorCode = STORAGEBENCH_ERROR_UNINITIALIZED;
this->status = StorageBenchStatus_ERROR;
retVal = STORAGEBENCH_ERROR_UNINITIALIZED;
}
else
{
try
{
this->start();
this->status = StorageBenchStatus_RUNNING;
lastError = this->lastRunErrorCode;
}
catch(PThreadCreateException& e)
{
LogContext(logContext).logErr(std::string("Unable to start thread: ") + e.what() );
this->status = StorageBenchStatus_ERROR;
lastError = this->lastRunErrorCode;
}
}
}
if(lastError != STORAGEBENCH_ERROR_NO_ERROR)
{
retVal = lastError;
}
return retVal;
}
/*
* initialize the storage benchmark with the given informations
*
* @param targetIDs a list with the targetIDs which the benchmark tests
* @param blocksize the blocksize for the benchmark
* @param size the size for the benchmark
* @param threads the number (simulated clients) of threads for the benchmark
* @param type the type of the benchmark
* @return the error code, 0 if the benchmark was initialize successful (STORAGEBENCH_ERROR..)
*
*/
int StorageBenchSlave::initStorageBench(UInt16List* targetIDs, int64_t blocksize,
int64_t size, int threads, bool odirect, StorageBenchType type)
{
const char* logContext = "Storage Benchmark (init)";
LogContext(logContext).log(Log_DEBUG, "Initializing benchmark ...");
this->benchType = type;
this->targetIDs = new auto(*targetIDs);
this->blocksize = blocksize;
this->size = size;
this->numThreads = threads;
this->odirect = odirect;
this->numThreadsDone = 0;
initThreadData();
if (!initTransferData())
{
this->lastRunErrorCode = STORAGEBENCH_ERROR_INIT_TRANSFER_DATA;
this->status = StorageBenchStatus_ERROR;
return STORAGEBENCH_ERROR_INIT_TRANSFER_DATA;
}
if (this->benchType == StorageBenchType_READ)
{
if (!checkReadData())
{
LogContext(logContext).logErr(
std::string("No (or not enough) data for read benchmark available. "
"Start a write benchmark with the same size parameter before the read benchmark.") );
this->lastRunErrorCode = STORAGEBENCH_ERROR_INIT_READ_DATA;
this->status = StorageBenchStatus_ERROR;
return STORAGEBENCH_ERROR_INIT_READ_DATA;
}
}
else
if (this->benchType == StorageBenchType_WRITE)
{
if (!createBenchmarkFolder() )
{
LogContext(logContext).logErr(
std::string("Couldn't create the benchmark folder."));
this->lastRunErrorCode = STORAGEBENCH_ERROR_INIT_CREATE_BENCH_FOLDER;
this->status = StorageBenchStatus_ERROR;
return STORAGEBENCH_ERROR_INIT_CREATE_BENCH_FOLDER;
}
}
else
{
LogContext(logContext).logErr(std::string(
"Unknown benchmark type: " + StringTk::uintToStr(this->benchType) ) );
return STORAGEBENCH_ERROR_INITIALIZATION_ERROR;
}
this->lastRunErrorCode = STORAGEBENCH_ERROR_NO_ERROR;
this->status = StorageBenchStatus_INITIALISED;
LogContext(logContext).log(Log_DEBUG, std::string("Benchmark initialized."));
return STORAGEBENCH_ERROR_NO_ERROR;
}
/*
* initialize the data which will be written to the disk, the size of the transfer data a equal
* to the blocksize and initialized with random characters
*
* @return true if the random data are initialized,
* false if a error occurred
*
*/
bool StorageBenchSlave::initTransferData()
{
const char* logContext = "Storage Benchmark (init buf)";
LogContext(logContext).log(Log_DEBUG, std::string("Initializing random data..."));
void* rawTransferData;
if (posix_memalign(&rawTransferData, 4096, blocksize) != 0)
return false;
transferData.reset(static_cast<char*>(rawTransferData));
Random randomizer = Random();
for (int64_t counter = 0; counter < this->blocksize; counter++)
{
this->transferData[counter] = randomizer.getNextInt();
}
LogContext(logContext).log(Log_DEBUG, std::string("Random data initialized."));
return true;
}
/*
* frees the transfer data
*/
void StorageBenchSlave::freeTransferData()
{
transferData.reset();
}
/*
* initialize the informations about the threads
*
*/
void StorageBenchSlave::initThreadData()
{
const char* logContext = "Storage Benchmark (init)";
LogContext(logContext).log(Log_DEBUG, std::string("Initializing thread data..."));
this->threadData.clear();
int allThreadCounter = 0;
for (UInt16ListIter iter = targetIDs->begin(); iter != targetIDs->end(); iter++)
{
for (int threadCount = 0; threadCount < this->numThreads; threadCount++)
{
StorageBenchThreadData data;
data.targetID = *iter;
data.targetThreadID = threadCount;
data.engagedSize = 0;
data.fileDescriptor = 0;
data.neededTime = 0;
this->threadData[allThreadCounter] = data;
allThreadCounter++;
}
}
LogContext(logContext).log(Log_DEBUG, "Thread data initialized.");
}
/*
* starts the benchmark, a read or a write benchmark
*
*/
void StorageBenchSlave::run()
{
const char* logContext = "Storage Benchmark (run)";
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark started..."));
App* app = Program::getApp();
bool openRes = openFiles();
if (openRes)
{
this->startTime.setToNow();
// add a work package into the worker queue for every thread
for(StorageBenchThreadDataMapIter iter = threadData.begin();
iter != threadData.end();
iter++)
{
LOG_DEBUG(logContext, Log_DEBUG, std::string("Add work for target: ") +
StringTk::uintToStr(iter->second.targetID) );
LOG_DEBUG(logContext, Log_DEBUG, std::string("- threadID: ") +
StringTk::intToStr(iter->first) );
LOG_DEBUG(logContext, Log_DEBUG, std::string("- type: ") +
StringTk::intToStr(this->benchType) );
StorageBenchWork* work = new StorageBenchWork(iter->second.targetID, iter->first,
iter->second.fileDescriptor, this->benchType, getNextPackageSize(iter->first),
this->threadCommunication, this->transferData.get());
app->getWorkQueue(iter->second.targetID)->addIndirectWork(work);
}
while(getStatus() == StorageBenchStatus_RUNNING)
{
int threadID = 0;
if (this->threadCommunication->waitForIncomingData(STORAGEBENCH_READ_PIPE_TIMEOUT_MS))
{
this->threadCommunication->getReadFD()->readExact(&threadID, sizeof(int));
}
else
{
threadID = STORAGEBENCH_ERROR_COM_TIMEOUT;
}
if (this->getSelfTerminate())
{
LogContext(logContext).logErr(std::string("Abort benchmark."));
this->lastRunErrorCode = STORAGEBENCH_ERROR_ABORT_BENCHMARK;
setStatus(StorageBenchStatus_STOPPING);
if (threadID != STORAGEBENCH_ERROR_COM_TIMEOUT)
{
this->threadData[threadID].neededTime = this->startTime.elapsedMS();
this->numThreadsDone++;
}
break;
}
else
if (threadID == STORAGEBENCH_ERROR_WORKER_ERROR)
{
LogContext(logContext).logErr(std::string("I/O operation on disk failed."));
this->lastRunErrorCode = STORAGEBENCH_ERROR_WORKER_ERROR;
setStatus(StorageBenchStatus_STOPPING);
// increment the thread counter, because the thread which sent this error hasn't a
// work package in the queue of the workers but the response from the other threads
// must be collected
this->numThreadsDone++;
break;
}
else
if (threadID == STORAGEBENCH_ERROR_COM_TIMEOUT)
{
continue;
}
else
if ( (threadID < -1) || ( ( (unsigned)threadID) >= this->threadData.size() ) )
{ // error if the worker reports an unknown threadID
std::string errorMessage("Unknown thread ID: " + StringTk::intToStr(threadID) + "; "
"map size: " + StringTk::uintToStr(this->threadData.size() ) );
LogContext(logContext).logErr(errorMessage);
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_ERROR;
setStatus(StorageBenchStatus_STOPPING);
// increment the thread counter, because the thread which sent this error hasn't a
// work package in the queue of the workers but the response from the other threads
// must be collected
this->numThreadsDone++;
break;
}
StorageBenchThreadData* currentData = &this->threadData[threadID];
int64_t workSize = getNextPackageSize(threadID);
// add a new work package into the workers queue for the reported thread only if the
// data size for the thread is bigger then 0
if (workSize != 0)
{
StorageBenchWork* work = new StorageBenchWork(currentData->targetID, threadID,
currentData->fileDescriptor, this->benchType, workSize, this->threadCommunication,
this->transferData.get());
app->getWorkQueue(currentData->targetID)->addIndirectWork(work);
}
else
{
// the thread has finished his work
currentData->neededTime = this->startTime.elapsedMS();
this->numThreadsDone++;
}
if (this->numThreadsDone >= this->threadData.size())
{
setStatus(StorageBenchStatus_FINISHING);
}
}
//collect all responses from the worker
while ( (this->numThreadsDone < this->threadData.size()) && app->getWorkersRunning() )
{
int threadID = 0;
if (this->threadCommunication->waitForIncomingData(STORAGEBENCH_READ_PIPE_TIMEOUT_MS))
{
this->threadCommunication->getReadFD()->readExact(&threadID, sizeof(int));
}
else
{
continue;
}
LOG_DEBUG(logContext, Log_DEBUG, std::string("Collect response from worker."));
if(threadID >= 0)
this->threadData[threadID].neededTime = this->startTime.elapsedMS();
this->numThreadsDone++;
}
// all workers finished/stopped ==> close all files
closeFiles();
freeTransferData();
// all threads have finished the work or the benchmark was stopped, set new status
if (this->getStatus() == StorageBenchStatus_FINISHING)
{
this->setStatus(StorageBenchStatus_FINISHED);
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark finished."));
}
else
if (this->getStatus() == StorageBenchStatus_STOPPING)
{
if (this->lastRunErrorCode != STORAGEBENCH_ERROR_NO_ERROR)
{
this->setStatus(StorageBenchStatus_ERROR);
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark stopped with errors."));
}
else
{
this->setStatus(StorageBenchStatus_STOPPED);
LogContext(logContext).log(Log_CRITICAL, std::string("Benchmark stopped."));
}
}
}
else
{
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_OPEN_FILES;
setStatus(StorageBenchStatus_ERROR);
}
}
/*
* checks the size of the benchmark files, the benchmark files must be big enough for the
* read benchmark
*
* @return true if data for a read benchmark exists,
* false if the files to small or a error occurred
*
*/
bool StorageBenchSlave::checkReadData()
{
const char* logContext = "Storage Benchmark (check)";
for (StorageBenchThreadDataMapIter iter = threadData.begin();
iter != threadData.end(); iter++)
{
auto* const target = Program::getApp()->getStorageTargets()->getTarget(iter->second.targetID);
if (!target)
{
LogContext(logContext).logErr(std::string("TargetID unknown."));
return false;
}
std::string path = target->getPath().str();
path = path + "/" + STORAGEBENCH_STORAGE_SUBDIR_NAME + "/" +
StringTk::uintToStr(iter->second.targetThreadID);
int error = -1;
struct stat fileStat;
error = stat(path.c_str(), &fileStat);
if (error != -1)
{
if (fileStat.st_size < this->size)
{
LogContext(logContext).logErr(std::string("Existing benchmark file too small. "
"Requested file size: " + StringTk::int64ToStr(this->size) + " "
"File size: " + StringTk::intToStr(fileStat.st_size)));
return false;
}
}
else
{
LogContext(logContext).logErr(std::string("Couldn't stat() benchmark file. SysErr: ") +
System::getErrString() );
return false;
}
}
return true;
}
/*
* creates the benchmark folder in the storage target folder
*
* @return true if all benchmark folders are created,
* false if a error occurred
*
*/
bool StorageBenchSlave::createBenchmarkFolder()
{
const char* logContext = "Storage Benchmark (mkdir)";
for(UInt16ListIter iter = this->targetIDs->begin(); iter != this->targetIDs->end(); iter++)
{
auto* const target = Program::getApp()->getStorageTargets()->getTarget(*iter);
if (!target)
{
LogContext(logContext).logErr("TargetID unknown: " + StringTk::uintToStr(*iter) );
return false;
}
Path currentPath(target->getPath() / STORAGEBENCH_STORAGE_SUBDIR_NAME);
if(!StorageTk::createPathOnDisk(currentPath, false))
{
LogContext(logContext).logErr(
std::string("Unable to create benchmark directory: " + currentPath.str() ) );
return false;
}
}
return true;
}
/*
* opens all needed files for the benchmark. This method will be executed at the start
* of the benchmark
*
* @return true if all files are opened,
* false if a error occurred
*
*/
bool StorageBenchSlave::openFiles()
{
const char* logContext = "Storage Benchmark (open)";
mode_t openMode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
for(StorageBenchThreadDataMapIter iter = threadData.begin();
iter != threadData.end();
iter++)
{
auto* const target = Program::getApp()->getStorageTargets()->getTarget(iter->second.targetID);
if (!target)
{
LogContext(logContext).logErr(
"TargetID unknown: " + StringTk::uintToStr(iter->second.targetID) );
return false;
}
std::string path = target->getPath().str();
path = path + "/" STORAGEBENCH_STORAGE_SUBDIR_NAME "/" +
StringTk::uintToStr(iter->second.targetThreadID);
int fileDescriptor = -1;
// open file
int directFlag = this->odirect ? O_DIRECT : 0;
if(this->benchType == StorageBenchType_READ)
fileDescriptor = open(path.c_str(), O_RDONLY | directFlag);
else
fileDescriptor = open(path.c_str(), O_CREAT | O_WRONLY | O_TRUNC | directFlag, openMode);
if (fileDescriptor != -1)
iter->second.fileDescriptor = fileDescriptor;
else
{ // open failed
LogContext(logContext).logErr("Couldn't open benchmark file: " + path + "; "
"SysErr: " + System::getErrString() );
return false;
}
}
return true;
}
bool StorageBenchSlave::closeFiles()
{
const char* logContext = "Storage Benchmark (close)";
bool retVal = true;
for(StorageBenchThreadDataMapIter iter = threadData.begin();
iter != threadData.end();
iter++)
{
int tmpRetVal = close(iter->second.fileDescriptor);
if (tmpRetVal != 0)
{
int closeErrno = errno;
auto* const target = Program::getApp()->getStorageTargets()->getTarget(
iter->second.targetID);
if (!target)
{
LogContext(logContext).logErr(
"TargetID unknown: " + StringTk::uintToStr(iter->second.targetID) );
return false;
}
std::string path = target->getPath().str();
path = path + "/" + STORAGEBENCH_STORAGE_SUBDIR_NAME + "/" +
StringTk::uintToStr(iter->second.targetThreadID);
LogContext(logContext).logErr("Couldn't close file: " + path + "; "
"SysErr: " + System::getErrString(closeErrno) );
retVal = false;
}
}
return retVal;
}
/*
* calculates the size (bytes) of the data which will be written on the disk by the worker with
* the next work package for the given thread
*
* @param threadID the threadID
* @return the size of the data for next work package in bytes,
* if 0 the given thread has written all data
*
*/
int64_t StorageBenchSlave::getNextPackageSize(int threadID)
{
int64_t retVal = BEEGFS_MIN(this->blocksize,
this->size - this->threadData[threadID].engagedSize);
this->threadData[threadID].engagedSize += retVal;
return retVal;
}
/*
* calculates the throughput (kB/s) of the given target
*
* @param targetID the targetID
* @return the throughput of the given target in kilobytes per second
*
*/
int64_t StorageBenchSlave::getResult(uint16_t targetID)
{
int64_t size = 0;
int64_t time = 0;
for(StorageBenchThreadDataMapIter iter = this->threadData.begin();
iter != this->threadData.end();
iter++)
{
if (iter->second.targetID == targetID)
{
// summarize the size of the different threads which worked on a target
size += iter->second.engagedSize;
// search the thread with the longest runtime
if (time < this->threadData[iter->first].neededTime)
time = this->threadData[iter->first].neededTime;
}
}
// if the threads are not finished use the needed time up to now
if (time == 0)
time = this->startTime.elapsedMS();
// if no results available return zero
if ( (size == 0) || (time == 0) )
return 0;
// input: size in bytes, time in milliseconds,
// output: in kilobytes per second
return ( (size * 1000) / (time * 1024) );
}
/*
* calculates the throughput (kB/s) of the given targets
*
* @param targetIDs the list of targetIDs
* @param outResults a initialized map for the results, which contains the results after
* execution of the method
*
*/
void StorageBenchSlave::getResults(UInt16List* targetIDs, StorageBenchResultsMap* outResults)
{
for (UInt16ListIter iter = targetIDs->begin(); iter != targetIDs->end(); iter++)
{
(*outResults)[*iter] = getResult(*iter);
}
}
/*
* calculates the throughput (kB/s) of all targets
*
* @param outResults a initialized map for the results, which contains the results after
* execution of the method
*
*/
void StorageBenchSlave::getAllResults(StorageBenchResultsMap* outResults)
{
for (UInt16ListIter iter = this->targetIDs->begin(); iter != this->targetIDs->end(); iter++)
{
(*outResults)[*iter] = getResult(*iter);
}
}
/*
* calculates the throughput (kB/s) of the given targets and returns the status of the benchmark
*
* @param targetIDs the list of targetIDs
* @param outResults a initialized map for the results, which contains the results after
* execution of the method
* @return the status of the benchmark
*
*/
StorageBenchStatus StorageBenchSlave::getStatusWithResults(UInt16List* targetIDs,
StorageBenchResultsMap* outResults)
{
getResults(targetIDs, outResults);
return getStatus();
}
/*
* stop the benchmark
*
* @return the error code, 0 if the benchmark will stop (STORAGEBENCH_ERROR..)
*
*/
int StorageBenchSlave::stopBenchmark()
{
const std::lock_guard<Mutex> lock(statusMutex);
if (this->status == StorageBenchStatus_RUNNING)
{
this->status = StorageBenchStatus_STOPPING;
return STORAGEBENCH_ERROR_NO_ERROR;
}
else
if(this->status == StorageBenchStatus_FINISHING || this->status == StorageBenchStatus_STOPPING)
{
return STORAGEBENCH_ERROR_NO_ERROR;
}
return STORAGEBENCH_ERROR_NO_ERROR;
}
/*
* deletes all files in the benchmark folder of the given targets
*
* @param targetIDs the list of targetIDs which will be cleaned
* @return the error code, 0 if the cleanup was successful (STORAGEBENCH_ERROR..)
*
*/
int StorageBenchSlave::cleanup(UInt16List* targetIDs)
{
const std::lock_guard<Mutex> lock(statusMutex);
const char* logContext = "Storage Benchmark (cleanup)";
//cleanup only possible if no benchmark is running
if (STORAGEBENCHSTATUS_IS_ACTIVE(this->status))
{
LogContext(logContext).logErr("Cleanup not possible benchmark is running");
return STORAGEBENCH_ERROR_RUNTIME_CLEANUP_JOB_ACTIVE;
}
for(UInt16ListIter iter = targetIDs->begin(); iter != targetIDs->end(); iter++)
{
auto* const target = Program::getApp()->getStorageTargets()->getTarget(*iter);
if (!target)
{
LogContext(logContext).logErr(std::string("TargetID unknown."));
return STORAGEBENCH_ERROR_RUNTIME_UNKNOWN_TARGET;
}
std::string path = target->getPath().str();
path.append("/");
path.append(STORAGEBENCH_STORAGE_SUBDIR_NAME);
path.append("/");
DIR* dir = opendir(path.c_str());
if (dir == NULL)
{
int openDirErrno = errno;
int errRetVal;
if (openDirErrno == ENOENT)
{ // benchmark directory doesn't exist, no benchmark data for cleanup
errRetVal = STORAGEBENCH_ERROR_NO_ERROR;
}
else
{
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
errRetVal = STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
LogContext(logContext).logErr("Unable to delete files in benchmark directory: " + path +
"; failed with SysErr: " + System::getErrString(errno));
}
return errRetVal;
}
struct dirent* dirEntry = StorageTk::readdirFiltered(dir);
while (dirEntry)
{
struct stat statData;
std::string filePath(path + dirEntry->d_name);
int retVal = stat(filePath.c_str(), &statData);
if ((retVal == 0) && (S_ISREG(statData.st_mode)) )
{
int error = unlink(filePath.c_str());
if(error != 0)
{
LogContext(logContext).logErr(
std::string("Unable to delete files in benchmark directory: "
+ path));
this->lastRunErrorCode = STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
closedir(dir);
return STORAGEBENCH_ERROR_RUNTIME_DELETE_FOLDER;
}
}
else
if(!S_ISREG(statData.st_mode))
LogContext(logContext).logErr("Unable to delete files in benchmark directory: " +
path + " It's not a regular file.");
else
LogContext(logContext).logErr("Unable to delete files in benchmark directory: " + path);
dirEntry = StorageTk::readdirFiltered(dir);
}
closedir(dir);
}
return STORAGEBENCH_ERROR_NO_ERROR;
}
/*
* aborts the benchmark, will be used if SIGINT received
*
*/
void StorageBenchSlave::shutdownBenchmark()
{
this->selfTerminate();
}
void StorageBenchSlave::waitForShutdownBenchmark()
{
const std::lock_guard<Mutex> lock(statusMutex);
while(STORAGEBENCHSTATUS_IS_ACTIVE(this->status))
{
this->statusChangeCond.wait(&this->statusMutex);
}
}

View File

@@ -0,0 +1,145 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/benchmark/StorageBench.h>
#include <common/threading/Condition.h>
#include <common/threading/PThread.h>
#include <common/toolkit/Pipe.h>
#include <common/toolkit/TimeFine.h>
#include <common/Common.h>
#include <mutex>
// struct for the informations about a thread which simulates a client
struct StorageBenchThreadData
{
uint16_t targetID;
int targetThreadID;
int64_t engagedSize; // amount of data which was submitted for write/read
int fileDescriptor;
int64_t neededTime;
};
// deleter functor for transferData
struct TransferDataDeleter {
void operator()(char* transferData) { free(transferData); }
};
// map for the informations about a thread; key: virtual threadID, value: information about thread
typedef std::map<int, StorageBenchThreadData> StorageBenchThreadDataMap;
typedef StorageBenchThreadDataMap::iterator StorageBenchThreadDataMapIter;
typedef StorageBenchThreadDataMap::const_iterator StorageBenchThreadDataMapCIter;
typedef StorageBenchThreadDataMap::value_type StorageBenchThreadDataMapVal;
class StorageBenchSlave : public PThread
{
public:
StorageBenchSlave()
: PThread("StorageBenchSlave"),
threadCommunication(new Pipe(false, false) ),
log("Storage Benchmark"),
lastRunErrorCode(STORAGEBENCH_ERROR_NO_ERROR),
status(StorageBenchStatus_UNINITIALIZED),
benchType(StorageBenchType_NONE),
blocksize(1), // useless defaults
size(1), // useless defaults
numThreads(1), // useless defaults
numThreadsDone(0),
targetIDs(NULL),
transferData(nullptr)
{ }
virtual ~StorageBenchSlave()
{
SAFE_DELETE(this->threadCommunication);
SAFE_DELETE(this->targetIDs);
}
int initAndStartStorageBench(UInt16List* targetIDs, int64_t blocksize, int64_t size,
int threads, bool odirect, StorageBenchType type);
int cleanup(UInt16List* targetIDs);
int stopBenchmark();
StorageBenchStatus getStatusWithResults(UInt16List* targetIDs,
StorageBenchResultsMap* outResults);
void shutdownBenchmark();
void waitForShutdownBenchmark();
protected:
private:
Pipe* threadCommunication;
Mutex statusMutex;
Condition statusChangeCond;
LogContext log;
int lastRunErrorCode; // STORAGEBENCH_ERROR_...
StorageBenchStatus status;
StorageBenchType benchType;
int64_t blocksize;
int64_t size;
int numThreads;
bool odirect;
unsigned int numThreadsDone;
UInt16List* targetIDs;
StorageBenchThreadDataMap threadData;
std::unique_ptr<char[], TransferDataDeleter> transferData;
TimeFine startTime;
virtual void run();
int initStorageBench(UInt16List* targetIDs, int64_t blocksize, int64_t size,
int threads, bool odirect, StorageBenchType type);
bool initTransferData(void);
void initThreadData();
void freeTransferData();
bool checkReadData(void);
bool createBenchmarkFolder(void);
bool openFiles(void);
bool closeFiles(void);
int64_t getNextPackageSize(int threadID);
int64_t getResult(uint16_t targetID);
void getResults(UInt16List* targetIDs, StorageBenchResultsMap* outResults);
void getAllResults(StorageBenchResultsMap* outResults);
void setStatus(StorageBenchStatus newStatus)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->status = newStatus;
this->statusChangeCond.broadcast();
}
public:
//public inliners
int getLastRunErrorCode()
{
return this->lastRunErrorCode;
}
StorageBenchStatus getStatus()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->status;
}
StorageBenchType getType()
{
return this->benchType;
}
UInt16List* getTargetIDs()
{
return this->targetIDs;
}
};

View File

@@ -0,0 +1,745 @@
#include <program/Program.h>
#include <common/components/worker/IncSyncedCounterWork.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
#include <common/toolkit/StringTk.h>
#include "BuddyResyncJob.h"
#include <boost/lexical_cast.hpp>
#define BUDDYRESYNCJOB_MAXDIRWALKDEPTH 2
BuddyResyncJob::BuddyResyncJob(uint16_t targetID) :
PThread("BuddyResyncJob_" + StringTk::uintToStr(targetID)),
targetID(targetID),
status(BuddyResyncJobState_NOTSTARTED),
startTime(0), endTime(0)
{
App* app = Program::getApp();
unsigned numGatherSlaves = app->getConfig()->getTuneNumResyncGatherSlaves();
unsigned numSyncSlavesTotal = app->getConfig()->getTuneNumResyncSlaves();
unsigned numFileSyncSlaves = BEEGFS_MAX((numSyncSlavesTotal / 2), 1);
unsigned numDirSyncSlaves = BEEGFS_MAX((numSyncSlavesTotal / 2), 1);
// prepare slaves (vectors) and result vector
gatherSlaveVec.resize(numGatherSlaves);
fileSyncSlaveVec.resize(numFileSyncSlaves);
dirSyncSlaveVec.resize(numDirSyncSlaves);
}
BuddyResyncJob::~BuddyResyncJob()
{
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin(); iter != gatherSlaveVec.end();
iter++)
{
BuddyResyncerGatherSlave* slave = *iter;
SAFE_DELETE(slave);
}
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
iter != fileSyncSlaveVec.end(); iter++)
{
BuddyResyncerFileSyncSlave* slave = *iter;
SAFE_DELETE(slave);
}
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
iter != dirSyncSlaveVec.end(); iter++)
{
BuddyResyncerDirSyncSlave* slave = *iter;
SAFE_DELETE(slave);
}
}
void BuddyResyncJob::run()
{
// make sure only one job at a time can run!
{
std::lock_guard<Mutex> mutexLock(statusMutex);
if (status == BuddyResyncJobState_RUNNING)
{
LogContext(__func__).logErr("Refusing to run same BuddyResyncJob twice!");
return;
}
else
{
status = BuddyResyncJobState_RUNNING;
startTime = time(NULL);
endTime = 0;
}
}
App* app = Program::getApp();
StorageTargets* storageTargets = app->getStorageTargets();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
TargetMapper* targetMapper = app->getTargetMapper();
NodeStoreServers* storageNodes = app->getStorageNodes();
WorkerList* workerList = app->getWorkers();
bool startGatherSlavesRes;
bool startSyncSlavesRes;
std::string targetPath;
std::string chunksPath;
bool buddyCommIsOverride = false; // treat errors during lastbuddycomm read as "0, no override"
int64_t lastBuddyCommTimeSecs;
int64_t lastBuddyCommSafetyThresholdSecs;
bool checkTopLevelDirRes;
bool walkRes;
auto& target = *storageTargets->getTargets().at(targetID);
shallAbort.setZero();
targetWasOffline = false;
// delete sync candidates and gather queue; just in case there was something from a previous run
syncCandidates.clear();
gatherSlavesWorkQueue.clear();
target.setBuddyResyncInProgress(true);
LogContext(__func__).log(Log_NOTICE,
"Started resync of targetID " + StringTk::uintToStr(targetID));
// before starting the threads make sure every worker knows about the resync (the current work
// package must be finished), for that we use a dummy package
Mutex mutex;
Condition counterIncrementedCond;
SynchronizedCounter numReadyWorkers;
size_t numWorkers = workerList->size();
for (WorkerListIter iter = workerList->begin(); iter != workerList->end(); iter++)
{
Worker* worker = *iter;
PersonalWorkQueue* personalQueue = worker->getPersonalWorkQueue();
MultiWorkQueue* workQueue = worker->getWorkQueue();
IncSyncedCounterWork* incCounterWork = new IncSyncedCounterWork(&numReadyWorkers);
workQueue->addPersonalWork(incCounterWork, personalQueue);
}
numReadyWorkers.waitForCount(numWorkers);
// notify buddy, that resync started and wait for confirmation
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(targetID);
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto buddyNode = storageNodes->referenceNode(buddyNodeID);
StorageResyncStartedMsg storageResyncStartedMsg(buddyTargetID);
const auto respMsg = MessagingTk::requestResponse(*buddyNode, storageResyncStartedMsg,
NETMSGTYPE_StorageResyncStartedResp);
std::pair<bool, std::chrono::system_clock::time_point> lastBuddyComm;
if (!respMsg)
{
LOG(MIRRORING, ERR, "Unable to notify buddy about resync attempt. Resync will not start.",
targetID, buddyTargetID);
setStatus(BuddyResyncJobState_FAILURE);
goto cleanup;
}
startGatherSlavesRes = startGatherSlaves(target);
if (!startGatherSlavesRes)
{
setStatus(BuddyResyncJobState_FAILURE);
goto cleanup;
}
startSyncSlavesRes = startSyncSlaves();
if (!startSyncSlavesRes)
{
setStatus(BuddyResyncJobState_FAILURE);
// terminate gather slaves
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->selfTerminate();
goto cleanup;
}
numDirsDiscovered.setZero();
numDirsMatched.setZero();
// walk over the directories until we reach a certain level and then pass the direcories to
// gather slaves to parallelize it
targetPath = target.getPath().str();
chunksPath = targetPath + "/" + CONFIG_BUDDYMIRROR_SUBDIR_NAME;
lastBuddyComm = target.getLastBuddyComm();
buddyCommIsOverride = lastBuddyComm.first;
lastBuddyCommTimeSecs = std::chrono::system_clock::to_time_t(lastBuddyComm.second);
lastBuddyCommSafetyThresholdSecs = app->getConfig()->getSysResyncSafetyThresholdMins()*60;
if ( (lastBuddyCommSafetyThresholdSecs == 0) && (!buddyCommIsOverride) ) // ignore timestamp file
lastBuddyCommTimeSecs = 0;
else
if (lastBuddyCommTimeSecs > lastBuddyCommSafetyThresholdSecs)
lastBuddyCommTimeSecs -= lastBuddyCommSafetyThresholdSecs;
checkTopLevelDirRes = checkTopLevelDir(chunksPath, lastBuddyCommTimeSecs);
if (!checkTopLevelDirRes)
{
setStatus(BuddyResyncJobState_FAILURE);
// terminate gather slaves
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->selfTerminate();
// terminate sync slaves
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
fileSyncSlaveVec[i]->selfTerminate();
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
dirSyncSlaveVec[i]->selfTerminate();
goto cleanup;
}
walkRes = walkDirs(chunksPath, "", 0, lastBuddyCommTimeSecs);
if (!walkRes)
{
setStatus(BuddyResyncJobState_FAILURE);
// terminate gather slaves
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->selfTerminate();
// terminate sync slaves
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
fileSyncSlaveVec[i]->selfTerminate();
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
dirSyncSlaveVec[i]->selfTerminate();
goto cleanup;
}
// all directories are read => tell gather slave to stop when work queue is empty and wait for
// all to stop
for(size_t i = 0; i < gatherSlaveVec.size(); i++)
{
if (likely(shallAbort.read() == 0))
gatherSlaveVec[i]->setOnlyTerminateIfIdle(true);
else
gatherSlaveVec[i]->setOnlyTerminateIfIdle(false);
gatherSlaveVec[i]->selfTerminate();
}
joinGatherSlaves();
// gather slaves have finished => tell sync slaves to stop when work packages are empty and wait
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
{
if (likely(shallAbort.read() == 0))
fileSyncSlaveVec[i]->setOnlyTerminateIfIdle(true);
else
fileSyncSlaveVec[i]->setOnlyTerminateIfIdle(false);
fileSyncSlaveVec[i]->selfTerminate();
}
for(size_t i = 0; i < dirSyncSlaveVec.size(); i++)
{
if (likely(shallAbort.read() == 0))
dirSyncSlaveVec[i]->setOnlyTerminateIfIdle(true);
else
dirSyncSlaveVec[i]->setOnlyTerminateIfIdle(false);
dirSyncSlaveVec[i]->selfTerminate();
}
joinSyncSlaves();
cleanup:
// wait for gather slaves to stop
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin();
iter != gatherSlaveVec.end(); iter++)
{
BuddyResyncerGatherSlave* slave = *iter;
if(slave)
{
std::lock_guard<Mutex> safeLock(slave->statusMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&(slave->statusMutex));
}
}
bool syncErrors = false;
// wait for sync slaves to stop and save if any errors occured
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
iter != fileSyncSlaveVec.end(); iter++)
{
BuddyResyncerFileSyncSlave* slave = *iter;
if(slave)
{
{
std::lock_guard<Mutex> safeLock(slave->statusMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&(slave->statusMutex));
}
if (slave->getErrorCount() != 0)
syncErrors = true;
}
}
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
iter != dirSyncSlaveVec.end(); iter++)
{
BuddyResyncerDirSyncSlave* slave = *iter;
if(slave)
{
{
std::lock_guard<Mutex> safeLock(slave->statusMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&(slave->statusMutex));
}
if (slave->getErrorCount() != 0)
syncErrors = true;
}
}
if (getStatus() == BuddyResyncJobState_RUNNING) // status not set to anything special
{ // (e.g. FAILURE)
if (shallAbort.read() != 0) // job aborted?
{
setStatus(BuddyResyncJobState_INTERRUPTED);
informBuddy();
}
else if (syncErrors || targetWasOffline.read()) // any sync errors or success?
{
// we must set the buddy BAD if it has been offline during any period of time during which
// the resync was also running. we implicitly do this during resync proper, since resync
// slaves abort with errors if the target is offline. if the target goes offline *after*
// the last proper resync messages has been sent and comes *back* before we try to inform
// it we will never detect that it has been offline at all. concurrently executing
// messages (eg TruncFile) may run between our opportunities to detect the offline state
// and may fail to forward their actions *even though they should forward*. this would
// lead to an inconsistent secondary. since the target has gone offline, the only
// reasonable course of action is to fail to resync entirely.
setStatus(BuddyResyncJobState_ERRORS);
informBuddy();
}
else
{
setStatus(BuddyResyncJobState_SUCCESS);
// unset timestamp override file if an override was set
target.setLastBuddyComm(std::chrono::system_clock::from_time_t(0), true);
// so the target went offline between the previous check "syncErrors || targetWasOffline".
// any message that has tried to forward itself in the intervening time will have seen the
// offline state, but will have been unable to set the buddy to needs-resync because it
// still *is* needs-resync. the resync itself has been perfectly successful, but we have
// to start another one anyway once the target comes back to ensure that no information
// was lost.
target.setBuddyNeedsResync(targetWasOffline.read());
informBuddy();
if (targetWasOffline.read())
LOG(MIRRORING, WARNING,
"Resync successful, but target went offline during finalization. "
"Setting target to needs-resync again.", targetID);
}
}
target.setBuddyResyncInProgress(false);
endTime = time(NULL);
}
void BuddyResyncJob::abort()
{
shallAbort.set(1); // tell the file walk in this class to abort
// set setOnlyTerminateIfIdle on the slaves to false; they will be stopped by the main loop then
for(BuddyResyncerGatherSlaveVecIter iter = gatherSlaveVec.begin(); iter != gatherSlaveVec.end();
iter++)
{
BuddyResyncerGatherSlave* slave = *iter;
if(slave)
{
slave->setOnlyTerminateIfIdle(false);
}
}
// stop sync slaves
for(BuddyResyncerFileSyncSlaveVecIter iter = fileSyncSlaveVec.begin();
iter != fileSyncSlaveVec.end(); iter++)
{
BuddyResyncerFileSyncSlave* slave = *iter;
if(slave)
{
slave->setOnlyTerminateIfIdle(false);
}
}
for(BuddyResyncerDirSyncSlaveVecIter iter = dirSyncSlaveVec.begin();
iter != dirSyncSlaveVec.end(); iter++)
{
BuddyResyncerDirSyncSlave* slave = *iter;
if(slave)
{
slave->setOnlyTerminateIfIdle(false);
}
}
}
bool BuddyResyncJob::startGatherSlaves(const StorageTarget& target)
{
// create a gather slaves if they don't exist yet and start them
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
{
if(!gatherSlaveVec[i])
gatherSlaveVec[i] = new BuddyResyncerGatherSlave(target, &syncCandidates,
&gatherSlavesWorkQueue, i);
try
{
gatherSlaveVec[i]->resetSelfTerminate();
gatherSlaveVec[i]->start();
gatherSlaveVec[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
return false;
}
}
return true;
}
bool BuddyResyncJob::startSyncSlaves()
{
// create sync slaves and start them
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
{
if(!fileSyncSlaveVec[i])
fileSyncSlaveVec[i] = new BuddyResyncerFileSyncSlave(targetID, &syncCandidates, i);
try
{
fileSyncSlaveVec[i]->resetSelfTerminate();
fileSyncSlaveVec[i]->start();
fileSyncSlaveVec[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
// stop already started sync slaves
for(size_t j = 0; j < i; j++)
fileSyncSlaveVec[j]->selfTerminate();
return false;
}
}
for(size_t i = 0; i < dirSyncSlaveVec.size(); i++)
{
if(!dirSyncSlaveVec[i])
dirSyncSlaveVec[i] = new BuddyResyncerDirSyncSlave(targetID, &syncCandidates, i);
try
{
dirSyncSlaveVec[i]->resetSelfTerminate();
dirSyncSlaveVec[i]->start();
dirSyncSlaveVec[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
// stop already started sync slaves
for (size_t j = 0; j < fileSyncSlaveVec.size(); j++)
fileSyncSlaveVec[j]->selfTerminate();
for (size_t j = 0; j < i; j++)
dirSyncSlaveVec[j]->selfTerminate();
return false;
}
}
return true;
}
void BuddyResyncJob::joinGatherSlaves()
{
for (size_t i = 0; i < gatherSlaveVec.size(); i++)
gatherSlaveVec[i]->join();
}
void BuddyResyncJob::joinSyncSlaves()
{
for (size_t i = 0; i < fileSyncSlaveVec.size(); i++)
fileSyncSlaveVec[i]->join();
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
dirSyncSlaveVec[i]->join();
}
void BuddyResyncJob::getJobStats(StorageBuddyResyncJobStatistics& outStats)
{
uint64_t discoveredFiles = 0;
uint64_t matchedFiles = 0;
uint64_t discoveredDirs = numDirsDiscovered.read();
uint64_t matchedDirs = numDirsMatched.read();
uint64_t syncedFiles = 0;
uint64_t syncedDirs = 0;
uint64_t errorFiles = 0;
uint64_t errorDirs = 0;
for(size_t i = 0; i < gatherSlaveVec.size(); i++)
{
BuddyResyncerGatherSlave* slave = gatherSlaveVec[i];
if(slave)
{
uint64_t tmpDiscoveredFiles = 0;
uint64_t tmpMatchedFiles = 0;
uint64_t tmpDiscoveredDirs = 0;
uint64_t tmpMatchedDirs = 0;
slave->getCounters(tmpDiscoveredFiles, tmpMatchedFiles, tmpDiscoveredDirs, tmpMatchedDirs);
discoveredFiles += tmpDiscoveredFiles;
matchedFiles += tmpMatchedFiles;
discoveredDirs += tmpDiscoveredDirs;
matchedDirs += tmpMatchedDirs;
}
}
for(size_t i = 0; i < fileSyncSlaveVec.size(); i++)
{
BuddyResyncerFileSyncSlave* slave = fileSyncSlaveVec[i];
if(slave)
{
syncedFiles += slave->getNumChunksSynced();
errorFiles += slave->getErrorCount();
}
}
for (size_t i = 0; i < dirSyncSlaveVec.size(); i++)
{
BuddyResyncerDirSyncSlave* slave = dirSyncSlaveVec[i];
if (slave)
{
syncedDirs += slave->getNumDirsSynced();
discoveredDirs += slave->getNumAdditionalDirsMatched();
matchedDirs += slave->getNumAdditionalDirsMatched();
errorDirs += slave->getErrorCount();
}
}
outStats = StorageBuddyResyncJobStatistics(status, startTime, endTime, discoveredFiles,
discoveredDirs, matchedFiles, matchedDirs, syncedFiles, syncedDirs, errorFiles, errorDirs);
}
void BuddyResyncJob::informBuddy()
{
App* app = Program::getApp();
NodeStore* storageNodes = app->getStorageNodes();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
TargetMapper* targetMapper = app->getTargetMapper();
BuddyResyncJobState status = getStatus();
TargetConsistencyState newTargetState;
if ( (status == BuddyResyncJobState_ERRORS) || (status == BuddyResyncJobState_INTERRUPTED))
newTargetState = TargetConsistencyState_BAD;
else
if (status == BuddyResyncJobState_SUCCESS)
newTargetState = TargetConsistencyState_GOOD;
else
{
LogContext(__func__).log(Log_NOTICE, "Refusing to set a state for buddy target, because "
"resync status isn't well-defined. "
"localTargetID: " + StringTk::uintToStr(targetID) + "; "
"resyncState: " + StringTk::intToStr(status));
return;
}
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(targetID);
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto storageNode = storageNodes->referenceNode(buddyNodeID);
if (!storageNode)
{
LogContext(__func__).logErr(
"Unable to inform buddy about finished resync. TargetID: " + StringTk::uintToStr(targetID)
+ "; buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; buddyNodeID: "
+ buddyNodeID.str() + "; error: unknown storage node");
return;
}
SetTargetConsistencyStatesRespMsg* respMsgCast;
FhgfsOpsErr result;
UInt16List targetIDs;
UInt8List states;
targetIDs.push_back(buddyTargetID);
states.push_back(newTargetState);
SetTargetConsistencyStatesMsg msg(NODETYPE_Storage, &targetIDs, &states, false);
const auto respMsg = MessagingTk::requestResponse(*storageNode, msg,
NETMSGTYPE_SetTargetConsistencyStatesResp);
if (!respMsg)
{
LogContext(__func__).logErr(
"Unable to inform buddy about finished resync. "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"buddyNodeID: " + buddyNodeID.str() + "; "
"error: Communication error");
return;
}
respMsgCast = (SetTargetConsistencyStatesRespMsg*) respMsg.get();
result = respMsgCast->getResult();
if(result != FhgfsOpsErr_SUCCESS)
{
LogContext(__func__).logErr(
"Error while informing buddy about finished resync. "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"buddyNodeID: " + buddyNodeID.str() + "; "
"error: " + boost::lexical_cast<std::string>(result));
}
}
/*
* check the CONFIG_BUDDYMIRROR_SUBDIR_NAME directory
*/
bool BuddyResyncJob::checkTopLevelDir(std::string& path, int64_t lastBuddyCommTimeSecs)
{
struct stat statBuf;
int statRes = stat(path.c_str(), &statBuf);
if(statRes != 0)
{
LogContext(__func__).log(Log_WARNING,
"Couldn't stat chunks directory; resync job can't run. targetID: "
+ StringTk::uintToStr(targetID) + "; path: " + path
+ "; Error: " + System::getErrString(errno));
return false;
}
numDirsDiscovered.increase();
int64_t dirMTime = (int64_t) statBuf.st_mtim.tv_sec;
if(dirMTime > lastBuddyCommTimeSecs)
{ // sync candidate
ChunkSyncCandidateDir candidate("", targetID);
syncCandidates.add(candidate, this);
numDirsMatched.increase();
}
return true;
}
/*
* recursively walk through buddy mir directory until a depth of BUDDYRESYNCJOB_MAXDIRWALKDEPTH is
* reached; everything with a greater depth gets passed to the GatherSlaves to work on it in
* parallel
*/
bool BuddyResyncJob::walkDirs(std::string chunksPath, std::string relPath, int level,
int64_t lastBuddyCommTimeSecs)
{
bool retVal = true;
DIR* dirHandle;
struct dirent* dirEntry;
dirHandle = opendir(std::string(chunksPath + "/" + relPath).c_str());
if(!dirHandle)
{
LogContext(__func__).logErr("Unable to open path. "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"Rel. path: " + relPath + "; "
"Error: " + System::getErrString(errno) );
return false;
}
while ((dirEntry = StorageTk::readdirFiltered(dirHandle)) != NULL)
{
if(shallAbort.read() != 0)
break;
// get stat info
std::string currentRelPath;
if(unlikely(relPath.empty()))
currentRelPath = dirEntry->d_name;
else
currentRelPath = relPath + "/" + dirEntry->d_name;
std::string currentFullPath = chunksPath + "/" + currentRelPath;
struct stat statBuf;
int statRes = stat(currentFullPath.c_str(), &statBuf);
if(statRes != 0)
{
LogContext(__func__).log(Log_WARNING,
"Couldn't stat directory, which was discovered previously. Resync job might not be "
"complete. targetID " + StringTk::uintToStr(targetID) + "; "
"Rel. path: " + relPath + "; "
"Error: " + System::getErrString(errno));
retVal = false;
break; // => one error aborts it all
}
if(S_ISDIR(statBuf.st_mode))
{
// if level of dir is smaller than max, take care of it and recurse into it
if(level < BUDDYRESYNCJOB_MAXDIRWALKDEPTH)
{
numDirsDiscovered.increase();
int64_t dirMTime = (int64_t) statBuf.st_mtim.tv_sec;
if(dirMTime > lastBuddyCommTimeSecs)
{ // sync candidate
ChunkSyncCandidateDir candidate(currentRelPath, targetID);
syncCandidates.add(candidate, this);
numDirsMatched.increase();
}
bool walkRes = walkDirs(chunksPath, currentRelPath, level+1, lastBuddyCommTimeSecs);
if (!walkRes)
retVal = false;
}
else
// otherwise pass it to the slaves; NOTE: gather slave takes full path
gatherSlavesWorkQueue.add(currentFullPath, this);
}
else
{
LOG_DEBUG(__func__, Log_WARNING, "Found a file in directory structure");
}
}
if(!dirEntry && errno) // error occured
{
LogContext(__func__).logErr(
"Unable to read all directories; chunksPath: " + chunksPath + "; relativePath: " + relPath
+ "; SysErr: " + System::getErrString(errno));
retVal = false;
}
int closedirRes = closedir(dirHandle);
if (closedirRes != 0)
LOG_DEBUG(__func__, Log_WARNING,
"Unable to open path. targetID " + StringTk::uintToStr(targetID) + "; Rel. path: "
+ relPath + "; Error: " + System::getErrString(errno));
return retVal;
}

View File

@@ -0,0 +1,90 @@
#pragma once
#include <common/storage/mirroring/BuddyResyncJobStatistics.h>
#include <components/buddyresyncer/BuddyResyncerDirSyncSlave.h>
#include <components/buddyresyncer/BuddyResyncerFileSyncSlave.h>
#include <components/buddyresyncer/BuddyResyncerGatherSlave.h>
#define GATHERSLAVEQUEUE_MAXSIZE 5000
class BuddyResyncJob : public PThread
{
friend class GenericDebugMsgEx;
public:
BuddyResyncJob(uint16_t targetID);
virtual ~BuddyResyncJob();
virtual void run();
void abort();
void getJobStats(StorageBuddyResyncJobStatistics& outStats);
private:
uint16_t targetID;
Mutex statusMutex;
BuddyResyncJobState status;
int64_t startTime;
int64_t endTime;
ChunkSyncCandidateStore syncCandidates;
BuddyResyncerGatherSlaveWorkQueue gatherSlavesWorkQueue;
BuddyResyncerGatherSlaveVec gatherSlaveVec;
BuddyResyncerFileSyncSlaveVec fileSyncSlaveVec;
BuddyResyncerDirSyncSlaveVec dirSyncSlaveVec;
// this thread walks over the top dir structures itself, so we need to track that
AtomicUInt64 numDirsDiscovered;
AtomicUInt64 numDirsMatched;
AtomicInt16 shallAbort; // quasi-boolean
AtomicInt16 targetWasOffline;
bool checkTopLevelDir(std::string& path, int64_t lastBuddyCommTimeSecs);
bool walkDirs(std::string chunksPath, std::string relPath, int level,
int64_t lastBuddyCommTimeSecs);
bool startGatherSlaves(const StorageTarget& target);
bool startSyncSlaves();
void joinGatherSlaves();
void joinSyncSlaves();
public:
uint16_t getTargetID() const
{
return targetID;
}
BuddyResyncJobState getStatus()
{
std::lock_guard<Mutex> mutexLock(statusMutex);
return status;
}
bool isRunning()
{
std::lock_guard<Mutex> mutexLock(statusMutex);
return status == BuddyResyncJobState_RUNNING;
}
void setTargetOffline()
{
targetWasOffline.set(1);
}
private:
void setStatus(BuddyResyncJobState status)
{
std::lock_guard<Mutex> mutexLock(statusMutex);
this->status = status;
}
void informBuddy();
};
typedef std::map<uint16_t, BuddyResyncJob*> BuddyResyncJobMap; //mapping: targetID, job
typedef BuddyResyncJobMap::iterator BuddyResyncJobMapIter;

View File

@@ -0,0 +1,40 @@
#include <program/Program.h>
#include "BuddyResyncer.h"
BuddyResyncer::~BuddyResyncer()
{
// delete remaining jobs
for (BuddyResyncJobMapIter iter = resyncJobMap.begin(); iter != resyncJobMap.end(); iter++)
{
BuddyResyncJob* job = iter->second;
if( job->isRunning() )
{
job->abort();
job->join();
}
SAFE_DELETE(job);
}
}
/**
* @return FhgfsOpsErr_SUCCESS if everything was successfully started, FhgfsOpsErr_INUSE if already
* running
*/
FhgfsOpsErr BuddyResyncer::startResync(uint16_t targetID)
{
bool isNewJob;
// try to add an existing resync job; if it already exists, we get that
BuddyResyncJob* resyncJob = addResyncJob(targetID, isNewJob);
// Job already exists *and* is already running:
if (!isNewJob && resyncJob->isRunning() )
return FhgfsOpsErr_INUSE;
// job is ready and not running
resyncJob->start();
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,59 @@
#pragma once
#include <components/buddyresyncer/BuddyResyncJob.h>
#include <mutex>
/**
* This is not a component that represents a separate thread by itself. Instead, it is the
* controlling frontend for slave threads, which are started and stopped on request (i.e. it is not
* automatically started when the app is started).
*
* Callers should only use methods in this controlling frontend and not access the slave's methods
* directly.
*/
class BuddyResyncer
{
public:
~BuddyResyncer();
FhgfsOpsErr startResync(uint16_t targetID);
private:
BuddyResyncJobMap resyncJobMap;
Mutex resyncJobMapMutex;
public:
BuddyResyncJob* getResyncJob(uint16_t targetID)
{
std::lock_guard<Mutex> mutexLock(resyncJobMapMutex);
BuddyResyncJobMapIter iter = resyncJobMap.find(targetID);
if (iter != resyncJobMap.end())
return iter->second;
else
return NULL;
}
private:
BuddyResyncJob* addResyncJob(uint16_t targetID, bool& outIsNew)
{
std::lock_guard<Mutex> mutexLock(resyncJobMapMutex);
BuddyResyncJobMapIter iter = resyncJobMap.find(targetID);
if (iter != resyncJobMap.end())
{
outIsNew = false;
return iter->second;
}
else
{
BuddyResyncJob* job = new BuddyResyncJob(targetID);
resyncJobMap.insert(BuddyResyncJobMap::value_type(targetID, job) );
outIsNew = true;
return job;
}
}
};

View File

@@ -0,0 +1,395 @@
#include <app/App.h>
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <common/net/message/storage/listing/ListChunkDirIncrementalMsg.h>
#include <common/net/message/storage/listing/ListChunkDirIncrementalRespMsg.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "BuddyResyncerDirSyncSlave.h"
#include <boost/lexical_cast.hpp>
#define CHECK_AT_ONCE 50
BuddyResyncerDirSyncSlave::BuddyResyncerDirSyncSlave(uint16_t targetID,
ChunkSyncCandidateStore* syncCandidates, uint8_t slaveID) :
PThread("BuddyResyncerDirSyncSlave_" + StringTk::uintToStr(targetID) + "-"
+ StringTk::uintToStr(slaveID))
{
this->isRunning = false;
this->targetID = targetID;
this->syncCandidates = syncCandidates;
}
BuddyResyncerDirSyncSlave::~BuddyResyncerDirSyncSlave()
{
}
/**
* This is a component, which is started through its control frontend on-demand at
* runtime and terminates when it's done.
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
* of this thread running at the same time.
*/
void BuddyResyncerDirSyncSlave::run()
{
setIsRunning(true);
try
{
LogContext(__func__).log(Log_DEBUG, "Component started.");
registerSignalHandler();
numAdditionalDirsMatched.setZero();
numDirsSynced.setZero();
errorCount.setZero();
syncLoop();
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerDirSyncSlave::syncLoop()
{
App* app = Program::getApp();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
while (! getSelfTerminateNotIdle())
{
if((syncCandidates->isDirsEmpty()) && (getSelfTerminate()))
break;
ChunkSyncCandidateDir candidate;
syncCandidates->fetch(candidate, this);
if (unlikely(candidate.getTargetID() == 0)) // ignore targetID 0
continue;
std::string relativePath = candidate.getRelativePath();
uint16_t localTargetID = candidate.getTargetID();
// get buddy targetID
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(localTargetID);
// perform sync
FhgfsOpsErr resyncRes = doSync(relativePath, localTargetID, buddyTargetID);
if (resyncRes == FhgfsOpsErr_SUCCESS)
numDirsSynced.increase();
else if (resyncRes != FhgfsOpsErr_INTERRUPTED)
errorCount.increase(); // increment error count if an error occurred; note: if the slaves
// were interrupted from the outside (e.g. ctl) this is not an error
}
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::doSync(const std::string& dirPath, uint16_t localTargetID,
uint16_t buddyTargetID)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
App* app = Program::getApp();
TargetMapper* targetMapper = app->getTargetMapper();
NodeStoreServers* storageNodes = app->getStorageNodes();
// try to find the node with the buddyTargetID
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto node = storageNodes->referenceNode(buddyNodeID);
if(!node)
{
LogContext(__func__).logErr(
"Storage node does not exist; nodeID " + buddyNodeID.str());
return FhgfsOpsErr_UNKNOWNNODE;
}
int64_t offset = 0;
unsigned entriesFetched;
do
{
int64_t newOffset;
StringList names;
IntList entryTypes;
FhgfsOpsErr listRes = getBuddyDirContents(*node, dirPath, buddyTargetID, offset, names,
entryTypes, newOffset);
if(listRes != FhgfsOpsErr_SUCCESS)
{
retVal = listRes;
break;
}
offset = newOffset;
entriesFetched = names.size();
// match locally
FhgfsOpsErr findRes = findChunks(localTargetID, dirPath, names, entryTypes);
if(findRes != FhgfsOpsErr_SUCCESS)
{
retVal = findRes;
break;
}
// delete the remaining chunks/dirs on the buddy
StringList rmPaths;
for (StringListIter iter = names.begin(); iter != names.end(); iter++)
{
std::string path = dirPath + "/" + *iter;
rmPaths.push_back(path);
}
FhgfsOpsErr rmRes = removeBuddyChunkPaths(*node, localTargetID, buddyTargetID, rmPaths);
if (rmRes != FhgfsOpsErr_SUCCESS)
{
retVal = rmRes;
break;
}
if (getSelfTerminateNotIdle())
{
retVal = FhgfsOpsErr_INTERRUPTED;
break;
}
} while (entriesFetched == CHECK_AT_ONCE);
return retVal;
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::getBuddyDirContents(Node& node, const std::string& dirPath,
uint16_t targetID, int64_t offset, StringList& outNames, IntList& outEntryTypes,
int64_t& outNewOffset)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
unsigned msgRetryIntervalMS = 5000;
// get a part of the dir contents from the buddy target
ListChunkDirIncrementalMsg listMsg(targetID, true, dirPath, offset, CHECK_AT_ONCE, false, true);
listMsg.setMsgHeaderTargetID(targetID);
CombinedTargetState state;
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(targetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ( (!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
{
respMsg = MessagingTk::requestResponse(node, listMsg, NETMSGTYPE_ListChunkDirIncrementalResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; sleeping "
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
+ StringTk::uintToStr(targetID));
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes = Program::getApp()->getTargetStateStore()->getState(targetID, state);
}
}
if (!respMsg)
{ // communication error
LogContext(__func__).logErr(
"Communication with storage node failed: " + node.getTypedNodeID());
retVal = FhgfsOpsErr_COMMUNICATION;
}
else
if(!getStateRes)
{
LogContext(__func__).logErr("No valid state for node ID: " + node.getTypedNodeID() );
retVal = FhgfsOpsErr_INTERNAL;
}
else
{
// correct response type received
ListChunkDirIncrementalRespMsg* respMsgCast = (ListChunkDirIncrementalRespMsg*) respMsg.get();
FhgfsOpsErr listRes = respMsgCast->getResult();
if (listRes == FhgfsOpsErr_SUCCESS)
{
outNewOffset = respMsgCast->getNewOffset();
respMsgCast->getNames().swap(outNames);
respMsgCast->getEntryTypes().swap(outEntryTypes);
}
else
if (listRes != FhgfsOpsErr_PATHNOTEXISTS)
{ // not exists is ok, because path might have been deleted
LogContext(__func__).log(Log_WARNING, "Error listing chunks dir; "
"dirPath: " + dirPath + "; "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"node: " + node.getTypedNodeID() + "; "
"Error: " + boost::lexical_cast<std::string>(listRes));
retVal = listRes;
}
}
return retVal;
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::findChunks(uint16_t targetID, const std::string& dirPath,
StringList& inOutNames, IntList& inOutEntryTypes)
{
App* app = Program::getApp();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
const auto& target = app->getStorageTargets()->getTargets().at(targetID);
const int targetFD = *target->getMirrorFD();
StringListIter namesIter = inOutNames.begin();
IntListIter typesIter = inOutEntryTypes.begin();
while (namesIter != inOutNames.end())
{
std::string entryID = *namesIter;
DirEntryType entryType = (DirEntryType)*typesIter;
std::string entryPath;
if (likely(!dirPath.empty()))
entryPath = dirPath + "/" + entryID;
else
entryPath = entryID;
if (DirEntryType_ISDIR(entryType))
{
bool entryExists = StorageTk::pathExists(targetFD, entryPath);
if (!entryExists)
{
// dir not found, so we didn't know about it yet => add it to sync candidate store, so
// that it gets checked and we get a list of its contents;
ChunkSyncCandidateDir syncCandidate(entryPath, targetID);
syncCandidates->add(syncCandidate, this);
numAdditionalDirsMatched.increase();
}
// no matter if found or not: remove it from the list, because we do not explicitely
// delete directories on the buddy
namesIter = inOutNames.erase(namesIter);
typesIter = inOutEntryTypes.erase(typesIter);
}
else
{
// need to lock the chunk to check it
chunkLockStore->lockChunk(targetID, entryID);
bool entryExists = StorageTk::pathExists(targetFD, entryPath);
if (entryExists)
{
// chunk found => delete it from list an unlock it
namesIter = inOutNames.erase(namesIter);
typesIter = inOutEntryTypes.erase(typesIter);
chunkLockStore->unlockChunk(targetID, entryID);
}
else
{
// chunk not found => keep lock; will be unlocked after removal
namesIter++;
typesIter++;
}
}
}
return FhgfsOpsErr_SUCCESS;
}
FhgfsOpsErr BuddyResyncerDirSyncSlave::removeBuddyChunkPaths(Node& node, uint16_t localTargetID,
uint16_t buddyTargetID, StringList& paths)
{
unsigned msgRetryIntervalMS = 5000;
ChunkLockStore* chunkLockStore = Program::getApp()->getChunkLockStore();
RmChunkPathsMsg rmMsg(buddyTargetID, &paths);
rmMsg.addMsgHeaderFeatureFlag(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR);
rmMsg.setMsgHeaderTargetID(buddyTargetID);
CombinedTargetState state;
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ((!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE))
{
respMsg = MessagingTk::requestResponse(node, rmMsg, NETMSGTYPE_RmChunkPathsResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; sleeping "
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
+ StringTk::uintToStr(targetID));
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
}
}
// no matter if that succeeded or not we unlock all chunks here first
for (StringListIter iter = paths.begin(); iter != paths.end(); iter++)
{
std::string entryID = StorageTk::getPathBasename(*iter);
chunkLockStore->unlockChunk(localTargetID, entryID);
}
if (!respMsg)
{ // communication error
LogContext(__func__).logErr(
"Communication with storage node failed: " + node.getTypedNodeID());
return FhgfsOpsErr_COMMUNICATION;
}
else
if(!getStateRes)
{
LogContext(__func__).logErr("No valid state for node ID: " + node.getTypedNodeID() );
return FhgfsOpsErr_INTERNAL;
}
else
{
// correct response type received
RmChunkPathsRespMsg* respMsgCast = (RmChunkPathsRespMsg*) respMsg.get();
StringList& failedPaths = respMsgCast->getFailedPaths();
for(StringListIter iter = failedPaths.begin(); iter != failedPaths.end(); iter++)
{
LogContext(__func__).logErr("Chunk path could not be deleted; "
"path: " + *iter + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"node: " + node.getTypedNodeID());
}
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,106 @@
#pragma once
#include <common/nodes/Node.h>
#include <common/storage/StorageErrors.h>
#include <common/threading/PThread.h>
#include <components/buddyresyncer/SyncCandidate.h>
class BuddyResyncerDirSyncSlave : public PThread
{
friend class BuddyResyncer; // (to grant access to internal mutex)
friend class BuddyResyncJob; // (to grant access to internal mutex)
public:
BuddyResyncerDirSyncSlave(uint16_t targetID, ChunkSyncCandidateStore* syncCandidates,
uint8_t slaveID);
virtual ~BuddyResyncerDirSyncSlave();
private:
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
AtomicSizeT onlyTerminateIfIdle;
AtomicUInt64 numDirsSynced;
AtomicUInt64 numAdditionalDirsMatched;
AtomicUInt64 errorCount;
bool isRunning; // true if an instance of this component is currently running
uint16_t targetID;
ChunkSyncCandidateStore* syncCandidates;
virtual void run();
void syncLoop();
FhgfsOpsErr doSync(const std::string& dirPath, uint16_t localTargetID,
uint16_t buddyTargetID);
FhgfsOpsErr getBuddyDirContents(Node& node, const std::string& dirPath, uint16_t targetID,
int64_t offset, StringList& outNames, IntList& outEntryTypes, int64_t& outNewOffset);
FhgfsOpsErr findChunks(uint16_t targetID, const std::string& dirPath, StringList& inOutNames,
IntList& inOutEntryTypes);
FhgfsOpsErr removeBuddyChunkPaths(Node& node, uint16_t localTargetID, uint16_t buddyTargetID,
StringList& paths);
public:
// getters & setters
bool getIsRunning()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
void setOnlyTerminateIfIdle(bool value)
{
if (value)
onlyTerminateIfIdle.set(1);
else
onlyTerminateIfIdle.setZero();
}
bool getOnlyTerminateIfIdle()
{
if (onlyTerminateIfIdle.read() == 0)
return false;
else
return true;
}
uint64_t getNumDirsSynced()
{
return numDirsSynced.read();
}
uint64_t getNumAdditionalDirsMatched()
{
return numAdditionalDirsMatched.read();
}
uint64_t getErrorCount()
{
return errorCount.read();
}
private:
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
}
};
typedef std::list<BuddyResyncerDirSyncSlave*> BuddyResyncerDirSyncSlaveList;
typedef BuddyResyncerDirSyncSlaveList::iterator BuddyResyncerDirSyncSlaveListIter;
typedef std::vector<BuddyResyncerDirSyncSlave*> BuddyResyncerDirSyncSlaveVec;
typedef BuddyResyncerDirSyncSlaveVec::iterator BuddyResyncerDirSyncSlaveVecIter;

View File

@@ -0,0 +1,471 @@
#include <app/App.h>
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <common/net/message/storage/mirroring/ResyncLocalFileMsg.h>
#include <common/net/message/storage/mirroring/ResyncLocalFileRespMsg.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "BuddyResyncerFileSyncSlave.h"
#include <boost/lexical_cast.hpp>
#define PROCESS_AT_ONCE 1
#define SYNC_BLOCK_SIZE (1024*1024) // 1M
BuddyResyncerFileSyncSlave::BuddyResyncerFileSyncSlave(uint16_t targetID,
ChunkSyncCandidateStore* syncCandidates, uint8_t slaveID) :
PThread("BuddyResyncerFileSyncSlave_" + StringTk::uintToStr(targetID) + "-"
+ StringTk::uintToStr(slaveID))
{
this->isRunning = false;
this->syncCandidates = syncCandidates;
this->targetID = targetID;
}
BuddyResyncerFileSyncSlave::~BuddyResyncerFileSyncSlave()
{
}
/**
* This is a component, which is started through its control frontend on-demand at
* runtime and terminates when it's done.
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
* of this thread running at the same time.
*/
void BuddyResyncerFileSyncSlave::run()
{
setIsRunning(true);
try
{
LogContext(__func__).log(Log_DEBUG, "Component started.");
registerSignalHandler();
numChunksSynced.setZero();
errorCount.setZero();
syncLoop();
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerFileSyncSlave::syncLoop()
{
App* app = Program::getApp();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
while (! getSelfTerminateNotIdle())
{
if((syncCandidates->isFilesEmpty()) && (getSelfTerminate()))
break;
ChunkSyncCandidateFile candidate;
syncCandidates->fetch(candidate, this);
if (unlikely(candidate.getTargetID() == 0)) // ignore targetID 0
continue;
std::string relativePath = candidate.getRelativePath();
uint16_t localTargetID = candidate.getTargetID();
// get buddy targetID
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(localTargetID);
// perform sync
FhgfsOpsErr resyncRes = doResync(relativePath, localTargetID, buddyTargetID);
if (resyncRes == FhgfsOpsErr_SUCCESS)
numChunksSynced.increase();
else
if (resyncRes != FhgfsOpsErr_INTERRUPTED)
errorCount.increase();
}
}
FhgfsOpsErr BuddyResyncerFileSyncSlave::doResync(std::string& chunkPathStr, uint16_t localTargetID,
uint16_t buddyTargetID)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
unsigned msgRetryIntervalMS = 5000;
App* app = Program::getApp();
TargetMapper* targetMapper = app->getTargetMapper();
NodeStoreServers* storageNodes = app->getStorageNodes();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
std::string entryID = StorageTk::getPathBasename(chunkPathStr);
// try to find the node with the buddyTargetID
NumNodeID buddyNodeID = targetMapper->getNodeID(buddyTargetID);
auto node = storageNodes->referenceNode(buddyNodeID);
if(!node)
{
LogContext(__func__).log(Log_WARNING,
"Storage node does not exist; nodeID " + buddyNodeID.str());
return FhgfsOpsErr_UNKNOWNNODE;
}
int64_t offset = 0;
ssize_t readRes = 0;
unsigned resyncMsgFlags = 0;
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_BUDDYMIRROR;
LogContext(__func__).log(Log_DEBUG,
"File sync started. chunkPath: " + chunkPathStr + "; localTargetID: "
+ StringTk::uintToStr(localTargetID) + "; buddyTargetID"
+ StringTk::uintToStr(buddyTargetID));
do
{
boost::scoped_array<char> data(new char[SYNC_BLOCK_SIZE]);
const auto& target = app->getStorageTargets()->getTargets().at(localTargetID);
// lock the chunk
chunkLockStore->lockChunk(localTargetID, entryID);
const int fd = openat(*target->getMirrorFD(), chunkPathStr.c_str(), O_RDONLY | O_NOATIME);
if (fd == -1)
{
int errCode = errno;
if(errCode == ENOENT)
{ // chunk was deleted => no error
// delete the mirror chunk and return
bool rmRes = removeBuddyChunkUnlocked(*node, buddyTargetID, chunkPathStr);
if (!rmRes) // rm failed; stop resync
{
LogContext(__func__).log(Log_WARNING,
"File sync not started. chunkPath: " + chunkPathStr + "; localTargetID: "
+ StringTk::uintToStr(localTargetID) + "; buddyTargetID: "
+ StringTk::uintToStr(buddyTargetID));
retVal = FhgfsOpsErr_INTERNAL;
}
}
else // error => log and return
{
LogContext(__func__).logErr(
"Open of chunk failed. chunkPath: " + chunkPathStr + "; targetID: "
+ StringTk::uintToStr(localTargetID) + "; Error: "
+ System::getErrString(errCode));
retVal = FhgfsOpsErr_INTERNAL;
}
chunkLockStore->unlockChunk(localTargetID, entryID);
goto cleanup;
}
int seekRes = lseek(fd, offset, SEEK_SET);
if (seekRes == -1)
{
LogContext(__func__).logErr(
"Seeking in chunk failed. chunkPath: " + chunkPathStr + "; targetID: "
+ StringTk::uintToStr(localTargetID) + "; offset: " + StringTk::int64ToStr(offset));
chunkLockStore->unlockChunk(localTargetID, entryID);
goto cleanup;
}
readRes = read(fd, data.get(), SYNC_BLOCK_SIZE);
if( readRes == -1)
{
LogContext(__func__).logErr("Error during read; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + System::getErrString(errno));
retVal = FhgfsOpsErr_INTERNAL;
goto end_of_loop;
}
if(readRes > 0)
{
const char zeroBuf[RESYNCER_SPARSE_BLOCK_SIZE] = { 0 };
// check if sparse blocks are in the buffer
ssize_t bufPos = 0;
bool dataFound = false;
while (bufPos < readRes)
{
size_t cmpLen = BEEGFS_MIN(readRes-bufPos, RESYNCER_SPARSE_BLOCK_SIZE);
int cmpRes = memcmp(data.get() + bufPos, zeroBuf, cmpLen);
if(cmpRes != 0)
dataFound = true;
else // sparse area detected
{
if(dataFound) // had data before
{
resyncMsgFlags |= RESYNCLOCALFILEMSG_CHECK_SPARSE; // let the receiver do a check
break; // and stop checking here
}
}
bufPos += cmpLen;
}
// this inner loop is over and there are only sparse areas
/* make sure we always send a msg at offset==0 to truncate the file and allow concurrent
writers in a big inital sparse area */
if(offset && (readRes > 0) && (readRes == SYNC_BLOCK_SIZE) && !dataFound)
{
goto end_of_loop;
// => no transfer needed
}
/* let the receiver do a check, because we might be sending a sparse block at beginnig or
end of file */
if(!dataFound)
resyncMsgFlags |= RESYNCLOCALFILEMSG_CHECK_SPARSE;
}
{
ResyncLocalFileMsg resyncMsg(data.get(), chunkPathStr, buddyTargetID, offset, readRes);
if (!readRes || (readRes < SYNC_BLOCK_SIZE) ) // last iteration, set attribs and trunc buddy chunk
{
struct stat statBuf;
int statRes = fstat(fd, &statBuf);
if (statRes == 0)
{
if(statBuf.st_size < offset)
{ // in case someone truncated the file while we're reading at a high offset
offset = statBuf.st_size;
resyncMsg.setOffset(offset);
}
else
if(offset && !readRes)
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_TRUNC;
int mode = statBuf.st_mode;
unsigned userID = statBuf.st_uid;
unsigned groupID = statBuf.st_gid;
int64_t mtimeSecs = statBuf.st_mtim.tv_sec;
int64_t atimeSecs = statBuf.st_atim.tv_sec;
SettableFileAttribs chunkAttribs = {mode, userID,groupID, mtimeSecs, atimeSecs};
resyncMsg.setChunkAttribs(chunkAttribs);
resyncMsgFlags |= RESYNCLOCALFILEMSG_FLAG_SETATTRIBS;
}
else
{
LogContext(__func__).logErr("Error getting chunk attributes; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + System::getErrString(errno));
}
}
resyncMsg.setMsgHeaderFeatureFlags(resyncMsgFlags);
resyncMsg.setMsgHeaderTargetID(buddyTargetID);
CombinedTargetState state;
bool getStateRes =
Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ( (!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
{
respMsg = MessagingTk::requestResponse(*node, resyncMsg,
NETMSGTYPE_ResyncLocalFileResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; sleeping "
+ StringTk::uintToStr(msgRetryIntervalMS) + "ms before retry. targetID: "
+ StringTk::uintToStr(targetID));
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes =
Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
}
}
if (!respMsg)
{ // communication error
LogContext(__func__).log(Log_WARNING,
"Communication with storage node failed: " + node->getTypedNodeID());
retVal = FhgfsOpsErr_COMMUNICATION;
// set readRes to non-zero to force exiting loop
readRes = -2;
}
else
if(!getStateRes)
{
LogContext(__func__).log(Log_WARNING,
"No valid state for node ID: " + node->getTypedNodeID());
retVal = FhgfsOpsErr_INTERNAL;
// set readRes to non-zero to force exiting loop
readRes = -2;
}
else
{
// correct response type received
ResyncLocalFileRespMsg* respMsgCast = (ResyncLocalFileRespMsg*) respMsg.get();
FhgfsOpsErr syncRes = respMsgCast->getResult();
if(syncRes != FhgfsOpsErr_SUCCESS)
{
LogContext(__func__).log(Log_WARNING, "Error during resync; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + boost::lexical_cast<std::string>(syncRes));
retVal = syncRes;
// set readRes to non-zero to force exiting loop
readRes = -2;
}
}
}
end_of_loop:
int closeRes = close(fd);
if (closeRes == -1)
{
LogContext(__func__).log(Log_WARNING, "Error closing file descriptor; "
"chunkPath: " + chunkPathStr + "; "
"targetID: " + StringTk::uintToStr(localTargetID) + "; "
"BuddyNode: " + node->getTypedNodeID() + "; "
"buddyTargetID: " + StringTk::uintToStr(buddyTargetID) + "; "
"Error: " + System::getErrString(errno));
}
// unlock the chunk
chunkLockStore->unlockChunk(localTargetID, entryID);
// increment offset for next iteration
offset += readRes;
if ( getSelfTerminateNotIdle() )
{
retVal = FhgfsOpsErr_INTERRUPTED;
break;
}
} while (readRes == SYNC_BLOCK_SIZE);
cleanup:
LogContext(__func__).log(Log_DEBUG, "File sync finished. chunkPath: " + chunkPathStr);
return retVal;
}
/**
* Note: Chunk has to be locked by caller.
*/
bool BuddyResyncerFileSyncSlave::removeBuddyChunkUnlocked(Node& node, uint16_t buddyTargetID,
std::string& pathStr)
{
bool retVal = true;
unsigned msgRetryIntervalMS = 5000;
std::string entryID = StorageTk::getPathBasename(pathStr);
StringList rmPaths;
rmPaths.push_back(pathStr);
RmChunkPathsMsg rmMsg(buddyTargetID, &rmPaths);
rmMsg.addMsgHeaderFeatureFlag(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR);
rmMsg.setMsgHeaderTargetID(buddyTargetID);
CombinedTargetState state;
bool getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
// send request to node and receive response
std::unique_ptr<NetMessage> respMsg;
while ( (!respMsg) && (getStateRes)
&& (state.reachabilityState != TargetReachabilityState_OFFLINE) )
{
respMsg = MessagingTk::requestResponse(node, rmMsg, NETMSGTYPE_RmChunkPathsResp);
if (!respMsg)
{
LOG_DEBUG(__func__, Log_NOTICE,
"Unable to communicate, but target is not offline; "
"sleeping " + StringTk::uintToStr(msgRetryIntervalMS) + " ms before retry. "
"targetID: " + StringTk::uintToStr(targetID) );
PThread::sleepMS(msgRetryIntervalMS);
// if thread shall terminate, break loop here
if ( getSelfTerminateNotIdle() )
break;
getStateRes = Program::getApp()->getTargetStateStore()->getState(buddyTargetID, state);
}
}
if (!respMsg)
{ // communication error
LogContext(__func__).logErr(
"Communication with storage node failed: " + node.getTypedNodeID() );
return false;
}
else
if(!getStateRes)
{
LogContext(__func__).log(Log_WARNING,
"No valid state for node ID: " + node.getTypedNodeID() );
return false;
}
else
{
// correct response type received
RmChunkPathsRespMsg* respMsgCast = (RmChunkPathsRespMsg*) respMsg.get();
StringList& failedPaths = respMsgCast->getFailedPaths();
for (StringListIter iter = failedPaths.begin(); iter != failedPaths.end(); iter++)
{
LogContext(__func__).logErr("Chunk path could not be deleted; "
"path: " + *iter + "; "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"node: " + node.getTypedNodeID());
retVal = false;
}
}
return retVal;
}

View File

@@ -0,0 +1,98 @@
#pragma once
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/nodes/Node.h>
#include <common/storage/StorageErrors.h>
#include <common/threading/PThread.h>
#include <mutex>
class BuddyResyncerFileSyncSlave : public PThread
{
friend class BuddyResyncer; // (to grant access to internal mutex)
friend class BuddyResyncJob; // (to grant access to internal mutex)
public:
BuddyResyncerFileSyncSlave(uint16_t targetID, ChunkSyncCandidateStore* syncCandidates,
uint8_t slaveID);
virtual ~BuddyResyncerFileSyncSlave();
private:
AtomicSizeT onlyTerminateIfIdle; // atomic quasi-bool
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
AtomicUInt64 numChunksSynced;
AtomicUInt64 errorCount;
bool isRunning; // true if an instance of this component is currently running
uint16_t targetID;
ChunkSyncCandidateStore* syncCandidates;
virtual void run();
void syncLoop();
FhgfsOpsErr doResync(std::string& chunkPathStr, uint16_t localTargetID,
uint16_t buddyTargetID);
bool removeBuddyChunkUnlocked(Node& node, uint16_t buddyTargetID, std::string& pathStr);
public:
// getters & setters
bool getIsRunning()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
void setOnlyTerminateIfIdle(bool value)
{
if (value)
onlyTerminateIfIdle.set(1);
else
onlyTerminateIfIdle.setZero();
}
bool getOnlyTerminateIfIdle()
{
if (onlyTerminateIfIdle.read() == 0)
return false;
else
return true;
}
uint64_t getNumChunksSynced()
{
return numChunksSynced.read();
}
uint64_t getErrorCount()
{
return errorCount.read();
}
private:
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
}
};
typedef std::list<BuddyResyncerFileSyncSlave*> BuddyResyncerFileSyncSlaveList;
typedef BuddyResyncerFileSyncSlaveList::iterator BuddyResyncerFileSyncSlaveListIter;
typedef std::vector<BuddyResyncerFileSyncSlave*> BuddyResyncerFileSyncSlaveVec;
typedef BuddyResyncerFileSyncSlaveVec::iterator BuddyResyncerFileSyncSlaveVecIter;

View File

@@ -0,0 +1,162 @@
#include <app/App.h>
#include <toolkit/StorageTkEx.h>
#include <storage/StorageTargets.h>
#include <program/Program.h>
#include <mutex>
#include "BuddyResyncerGatherSlave.h"
Mutex BuddyResyncerGatherSlave::staticGatherSlavesMutex;
std::map<std::string, BuddyResyncerGatherSlave*> BuddyResyncerGatherSlave::staticGatherSlaves;
BuddyResyncerGatherSlave::BuddyResyncerGatherSlave(const StorageTarget& target,
ChunkSyncCandidateStore* syncCandidates, BuddyResyncerGatherSlaveWorkQueue* workQueue,
uint8_t slaveID) :
PThread("BuddyResyncerGatherSlave_" + StringTk::uintToStr(target.getID()) + "-" +
StringTk::uintToStr(slaveID)),
target(target)
{
this->isRunning = false;
this->syncCandidates = syncCandidates;
this->workQueue = workQueue;
const std::lock_guard<Mutex> lock(staticGatherSlavesMutex);
staticGatherSlaves[this->getName()] = this;
}
BuddyResyncerGatherSlave::~BuddyResyncerGatherSlave()
{
}
/**
* This is a component, which is started through its control frontend on-demand at
* runtime and terminates when it's done.
* We have to ensure (in cooperation with the control frontend) that we don't get multiple instances
* of this thread running at the same time.
*/
void BuddyResyncerGatherSlave::run()
{
setIsRunning(true);
numChunksDiscovered.setZero();
numChunksMatched.setZero();
numDirsDiscovered.setZero();
numDirsMatched.setZero();
try
{
LogContext(__func__).log(Log_DEBUG, "Component started.");
registerSignalHandler();
workLoop();
LogContext(__func__).log(Log_DEBUG, "Component stopped.");
}
catch(std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerGatherSlave::workLoop()
{
const unsigned maxOpenFDsNum = 20; // max open FDs => max path sub-depth for efficient traversal
while (!getSelfTerminateNotIdle())
{
if ((workQueue->queueEmpty()) && (getSelfTerminate()))
break;
// get a directory to scan
std::string pathStr = workQueue->fetch(this);
if(unlikely(pathStr.empty()))
continue;
int nftwRes = nftw(pathStr.c_str(), handleDiscoveredEntry, maxOpenFDsNum, FTW_ACTIONRETVAL);
if(nftwRes == -1)
{ // error occurred
LogContext(__func__).logErr("Error during chunks walk. SysErr: " + System::getErrString());
}
}
}
int BuddyResyncerGatherSlave::handleDiscoveredEntry(const char* path,
const struct stat* statBuf, int ftwEntryType, struct FTW* ftwBuf)
{
std::string chunksPath;
BuddyResyncerGatherSlave* thisStatic = nullptr;
{
const std::lock_guard<Mutex> lock(staticGatherSlavesMutex);
thisStatic = staticGatherSlaves[PThread::getCurrentThreadName()];
}
App* app = Program::getApp();
Config* cfg = app->getConfig();
const auto& targetPath = thisStatic->target.getPath().str();
chunksPath = targetPath + "/" + CONFIG_BUDDYMIRROR_SUBDIR_NAME;
if (strlen(path) <= chunksPath.length())
return FTW_CONTINUE;
std::string relPathStr = path + chunksPath.size() + 1;
if ( relPathStr.empty() )
return FTW_CONTINUE;
const auto lastBuddyComm = thisStatic->target.getLastBuddyComm();
const bool buddyCommIsOverride = lastBuddyComm.first;
int64_t lastBuddyCommTimeSecs = std::chrono::system_clock::to_time_t(lastBuddyComm.second);
int64_t lastBuddyCommSafetyThresholdSecs = cfg->getSysResyncSafetyThresholdMins()*60;
if ( (lastBuddyCommSafetyThresholdSecs == 0) && (!buddyCommIsOverride) ) // ignore timestamp file
lastBuddyCommTimeSecs = 0;
else
if (lastBuddyCommTimeSecs > lastBuddyCommSafetyThresholdSecs)
lastBuddyCommTimeSecs -= lastBuddyCommSafetyThresholdSecs;
if(ftwEntryType == FTW_D) // directory
{
thisStatic->numDirsDiscovered.increase();
int64_t dirModificationTime = (int64_t)statBuf->st_mtim.tv_sec;
if(dirModificationTime > lastBuddyCommTimeSecs)
{ // sync candidate
ChunkSyncCandidateDir candidate(relPathStr, thisStatic->target.getID());
thisStatic->syncCandidates->add(candidate, thisStatic);
thisStatic->numDirsMatched.increase();
}
}
else
if(ftwEntryType == FTW_F) // file
{
// we found a chunk
thisStatic->numChunksDiscovered.increase();
// we need to use ctime here, because mtime can be set manually (even to the future)
time_t chunkChangeTime = statBuf->st_ctim.tv_sec;
if(chunkChangeTime > lastBuddyCommTimeSecs)
{ // sync candidate
std::string relPathStr = path + chunksPath.size() + 1;
ChunkSyncCandidateFile candidate(relPathStr, thisStatic->target.getID());
thisStatic->syncCandidates->add(candidate, thisStatic);
thisStatic->numChunksMatched.increase();
}
}
return FTW_CONTINUE;
}

View File

@@ -0,0 +1,182 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/components/ComponentInitException.h>
#include <common/threading/PThread.h>
#include <ftw.h>
class StorageTarget;
#define GATHERSLAVEQUEUE_MAXSIZE 5000
class BuddyResyncerGatherSlaveWorkQueue
{
/*
* This is more or less just a small class for convenience, that is tightly coupled to
* BuddyResyncerGatherSlave and BuddyResyncerJob
*/
public:
BuddyResyncerGatherSlaveWorkQueue(): gatherSlavesWorkQueueLen(0) { }
private:
StringList paths;
size_t gatherSlavesWorkQueueLen; // used to avoid constant calling of size() method of list
Mutex mutex;
Condition pathAddedCond;
Condition pathFetchedCond;
public:
void add(std::string& path, PThread* caller)
{
unsigned waitTimeoutMS = 3000;
const std::lock_guard<Mutex> lock(mutex);
while (gatherSlavesWorkQueueLen > GATHERSLAVEQUEUE_MAXSIZE)
{
if((caller) && (unlikely(caller->getSelfTerminate())))
break;
pathFetchedCond.timedwait(&mutex, waitTimeoutMS);
}
paths.push_back(path);
gatherSlavesWorkQueueLen++;
pathAddedCond.signal();
}
std::string fetch(PThread* caller)
{
unsigned waitTimeoutMS = 3000;
const std::lock_guard<Mutex> lock(mutex);
while (paths.empty())
{
if((caller) && (unlikely(caller->getSelfTerminate())))
{
return "";
}
pathAddedCond.timedwait(&mutex, waitTimeoutMS);
}
std::string retVal = paths.front();
paths.pop_front();
gatherSlavesWorkQueueLen--;
pathFetchedCond.signal();
return retVal;
}
bool queueEmpty()
{
const std::lock_guard<Mutex> lock(mutex);
return gatherSlavesWorkQueueLen == 0;
}
void clear()
{
const std::lock_guard<Mutex> lock(mutex);
paths.clear();
gatherSlavesWorkQueueLen = 0;
}
};
class BuddyResyncerGatherSlave : public PThread
{
friend class BuddyResyncer; // (to grant access to internal mutex)
friend class BuddyResyncJob; // (to grant access to internal mutex)
public:
BuddyResyncerGatherSlave(const StorageTarget& target, ChunkSyncCandidateStore* syncCandidates,
BuddyResyncerGatherSlaveWorkQueue* workQueue, uint8_t slaveID);
virtual ~BuddyResyncerGatherSlave();
void workLoop();
private:
AtomicSizeT onlyTerminateIfIdle; // atomic quasi-bool
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
const StorageTarget& target;
AtomicUInt64 numChunksDiscovered;
AtomicUInt64 numChunksMatched;
AtomicUInt64 numDirsDiscovered;
AtomicUInt64 numDirsMatched;
bool isRunning; // true if an instance of this component is currently running
ChunkSyncCandidateStore* syncCandidates;
BuddyResyncerGatherSlaveWorkQueue* workQueue;
// nftw() callback needs access the slave threads
static Mutex staticGatherSlavesMutex;
static std::map<std::string, BuddyResyncerGatherSlave*> staticGatherSlaves;
virtual void run();
static int handleDiscoveredEntry(const char* path, const struct stat* statBuf,
int ftwEntryType, struct FTW* ftwBuf);
public:
// getters & setters
bool getIsRunning()
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
void getCounters(uint64_t& outNumChunksDiscovered, uint64_t& outNumChunksMatched,
uint64_t& outNumDirsDiscovered, uint64_t& outNumDirsMatched)
{
outNumChunksDiscovered = numChunksDiscovered.read();
outNumChunksMatched = numChunksMatched.read();
outNumDirsDiscovered = numDirsDiscovered.read();
outNumDirsMatched = numDirsMatched.read();
}
void setOnlyTerminateIfIdle(bool value)
{
if (value)
onlyTerminateIfIdle.set(1);
else
onlyTerminateIfIdle.setZero();
}
bool getOnlyTerminateIfIdle()
{
if (onlyTerminateIfIdle.read() == 0)
return false;
else
return true;
}
private:
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return ( (getSelfTerminate() && (!getOnlyTerminateIfIdle())) );
}
};
typedef std::vector<BuddyResyncerGatherSlave*> BuddyResyncerGatherSlaveVec;
typedef BuddyResyncerGatherSlaveVec::iterator BuddyResyncerGatherSlaveVecIter;

View File

@@ -0,0 +1,44 @@
#pragma once
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <string>
/**
* A storage sync candidate. Has a target ID and a path.
*/
class ChunkSyncCandidateDir
{
public:
ChunkSyncCandidateDir(const std::string& relativePath, const uint16_t targetID)
: relativePath(relativePath), targetID(targetID)
{ }
ChunkSyncCandidateDir()
: targetID(0)
{ }
private:
std::string relativePath;
uint16_t targetID;
public:
const std::string& getRelativePath() const { return relativePath; }
uint16_t getTargetID() const { return targetID; }
};
/**
* A storage sync candidate that also has an onlyAttribs flag.
*/
class ChunkSyncCandidateFile : public ChunkSyncCandidateDir
{
public:
ChunkSyncCandidateFile(const std::string& relativePath, uint16_t targetID)
: ChunkSyncCandidateDir(relativePath, targetID)
{ }
ChunkSyncCandidateFile() = default;
};
typedef SyncCandidateStore<ChunkSyncCandidateDir, ChunkSyncCandidateFile> ChunkSyncCandidateStore;

View File

@@ -0,0 +1,88 @@
#include "ChunkFetcher.h"
#include <program/Program.h>
#include <common/Common.h>
ChunkFetcher::ChunkFetcher()
: log("ChunkFetcher")
{
// for each targetID, put one fetcher thread into list
for (const auto& mapping : Program::getApp()->getStorageTargets()->getTargets())
this->slaves.emplace_back(mapping.first);
}
ChunkFetcher::~ChunkFetcher()
{
}
/**
* Start fetcher slaves if they are not running already.
*
* @return true if successfully started or already running, false if startup problem occurred.
*/
bool ChunkFetcher::startFetching()
{
const char* logContext = "ChunkFetcher (start)";
bool retVal = true; // false if error occurred
{
const std::lock_guard<Mutex> lock(chunksListMutex);
isBad = false;
}
for(ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
{
const std::lock_guard<Mutex> lock(iter->statusMutex);
if(!iter->isRunning)
{
// slave thread not running yet => start it
iter->resetSelfTerminate();
try
{
iter->start();
iter->isRunning = true;
}
catch (PThreadCreateException& e)
{
LogContext(logContext).logErr(std::string("Unable to start thread: ") + e.what());
retVal = false;
}
}
}
return retVal;
}
void ChunkFetcher::stopFetching()
{
for(ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
{
const std::lock_guard<Mutex> lock(iter->statusMutex);
if(iter->isRunning)
{
iter->selfTerminate();
}
}
}
void ChunkFetcher::waitForStopFetching()
{
for(ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
{
const std::lock_guard<Mutex> lock(iter->statusMutex);
chunksListFetchedCondition.broadcast();
while (iter->isRunning)
{
iter->isRunningChangeCond.wait(&(iter->statusMutex));
}
chunksList.clear();
}
}

View File

@@ -0,0 +1,101 @@
#pragma once
#include <components/chunkfetcher/ChunkFetcherSlave.h>
#include <common/toolkit/ListTk.h>
#include <mutex>
#define MAX_CHUNKLIST_SIZE 5000
// forward declaration
class ChunkFetcher;
typedef std::list<ChunkFetcherSlave> ChunkFetcherSlaveList;
typedef ChunkFetcherSlaveList::iterator ChunkFetcherSlaveListIter;
/**
* This is not a component that represents a separate thread. Instead, it contains and controls
* slave threads, which are started and stopped on request (i.e. they are not automatically started
* when the app is started).
* The slave threads will run over all chunks on all targets and read them in a format suitable for
* fsck
*/
class ChunkFetcher
{
public:
ChunkFetcher();
virtual ~ChunkFetcher();
bool startFetching();
void stopFetching();
void waitForStopFetching();
private:
LogContext log;
ChunkFetcherSlaveList slaves;
FsckChunkList chunksList;
Mutex chunksListMutex;
Condition chunksListFetchedCondition;
bool isBad;
public:
bool getIsBad()
{
const std::lock_guard<Mutex> lock(chunksListMutex);
return isBad;
}
void setBad()
{
const std::lock_guard<Mutex> lock(chunksListMutex);
isBad = true;
}
void addChunk(FsckChunk& chunk)
{
const std::lock_guard<Mutex> lock(chunksListMutex);
if (chunksList.size() > MAX_CHUNKLIST_SIZE)
chunksListFetchedCondition.wait(&chunksListMutex);
chunksList.push_back(chunk);
}
bool isQueueEmpty()
{
std::lock_guard<Mutex> lock(chunksListMutex);
return chunksList.empty();
}
void getAndDeleteChunks(FsckChunkList& outList, unsigned numChunks)
{
const std::lock_guard<Mutex> lock(chunksListMutex);
FsckChunkListIter iterEnd = this->chunksList.begin();
ListTk::advance(this->chunksList, iterEnd, numChunks);
outList.splice(outList.end(), this->chunksList, this->chunksList.begin(), iterEnd);
chunksListFetchedCondition.signal();
}
unsigned getNumRunning()
{
unsigned retVal = 0;
for (ChunkFetcherSlaveListIter iter = slaves.begin(); iter != slaves.end(); iter++)
{
const std::lock_guard<Mutex> lock(iter->statusMutex);
if ( iter->isRunning )
retVal++;
}
return retVal;
}
};

View File

@@ -0,0 +1,165 @@
#include "ChunkFetcherSlave.h"
#include <program/Program.h>
#include <boost/static_assert.hpp>
#include <libgen.h>
ChunkFetcherSlave::ChunkFetcherSlave(uint16_t targetID):
PThread("ChunkFetcherSlave-" + StringTk::uintToStr(targetID) ),
log("ChunkFetcherSlave-" + StringTk::uintToStr(targetID) ),
isRunning(false),
targetID(targetID)
{
}
ChunkFetcherSlave::~ChunkFetcherSlave()
{
}
void ChunkFetcherSlave::run()
{
setIsRunning(true);
try
{
registerSignalHandler();
walkAllChunks();
log.log(4, "Component stopped.");
}
catch(std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
/*
* walk over all chunks in that target
*/
void ChunkFetcherSlave::walkAllChunks()
{
App* app = Program::getApp();
log.log(Log_DEBUG, "Starting chunks walk...");
const auto& target = *app->getStorageTargets()->getTargets().at(targetID);
const auto& targetPath = target.getPath().str();
// walk over "normal" chunks (i.e. no mirrors)
std::string walkPath = targetPath + "/" + CONFIG_CHUNK_SUBDIR_NAME;
if(!walkChunkPath(walkPath, 0, walkPath.size() ) )
return;
// let's find out if this target is part of a buddy mirror group and if it is the primary
// target; if it is, walk over buddy mirror directory
bool isPrimaryTarget;
uint16_t buddyGroupID = app->getMirrorBuddyGroupMapper()->getBuddyGroupID(this->targetID,
&isPrimaryTarget);
if (isPrimaryTarget)
{
walkPath = targetPath + "/" CONFIG_BUDDYMIRROR_SUBDIR_NAME;
if(!walkChunkPath(walkPath, buddyGroupID, walkPath.size() ) )
return;
}
log.log(Log_DEBUG, "End of chunks walk.");
}
bool ChunkFetcherSlave::walkChunkPath(const std::string& path, uint16_t buddyGroupID,
unsigned basePathLen)
{
DIR* dir = ::opendir(path.c_str() );
if(!dir)
{
LOG(GENERAL, WARNING, "Could not open directory.", path, targetID, sysErr);
Program::getApp()->getChunkFetcher()->setBad();
return false;
}
int readRes;
bool result = true;
std::string pathBuf = path;
pathBuf.push_back('/');
while(!getSelfTerminate())
{
::dirent* item;
// we really want struct struct dirent to contain a reasonably sized array for the filename
BOOST_STATIC_ASSERT(sizeof(item->d_name) >= NAME_MAX + 1);
#if USE_READDIR_R
::dirent entry;
readRes = ::readdir_r(dir, &entry, &item);
#else
errno = 0;
item = readdir(dir);
readRes = item ? 0 : errno;
#endif
if(readRes != 0)
{
LOG(GENERAL, WARNING, "readdir failed.", path, targetID, sysErr(readRes));
result = false;
break;
}
if(!item)
break;
if(::strcmp(item->d_name, ".") == 0 || ::strcmp(item->d_name, "..") == 0)
continue;
pathBuf.resize(path.size() + 1);
pathBuf += item->d_name;
struct stat statBuf;
int statRes = ::stat(pathBuf.c_str(), &statBuf);
if(statRes)
{
LOG(GENERAL, WARNING, "Could not stat directory.", ("path", pathBuf), targetID, sysErr);
result = false;
break;
}
if(S_ISDIR(statBuf.st_mode) )
{
result = walkChunkPath(pathBuf, buddyGroupID, basePathLen);
if(!result)
break;
}
else
{
const char* relativeChunkPath = pathBuf.c_str() + basePathLen + 1;
// get only the dirname part of the path
char* tmpPathCopy = strdup(relativeChunkPath);
Path savedPath(dirname(tmpPathCopy) );
free(tmpPathCopy);
FsckChunk fsckChunk(item->d_name, targetID, savedPath, statBuf.st_size, statBuf.st_blocks,
statBuf.st_ctime, statBuf.st_mtime, statBuf.st_atime, statBuf.st_uid, statBuf.st_gid,
buddyGroupID);
Program::getApp()->getChunkFetcher()->addChunk(fsckChunk);
}
}
::closedir(dir);
if (getSelfTerminate())
result = false;
if(!result)
Program::getApp()->getChunkFetcher()->setBad();
return result;
}

View File

@@ -0,0 +1,62 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/components/ComponentInitException.h>
#include <common/fsck/FsckChunk.h>
#include <common/threading/PThread.h>
#include <mutex>
class ChunkFetcher; //forward decl.
/**
* This component runs over all chunks of one target and gathers information suitable for fsck
*
* This component is not auto-started when the app starts. It is started and stopped by the
* ChunkFetcher.
*/
class ChunkFetcherSlave : public PThread
{
friend class ChunkFetcher; // (to grant access to internal mutex)
public:
ChunkFetcherSlave(uint16_t targetID);
virtual ~ChunkFetcherSlave();
private:
LogContext log;
Mutex statusMutex; // protects isRunning
Condition isRunningChangeCond;
bool isRunning; // true if an instance of this component is currently running
uint16_t targetID;
virtual void run();
public:
// getters & setters
bool getIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
return this->isRunning;
}
private:
void walkAllChunks();
bool walkChunkPath(const std::string& path, uint16_t buddyGroupID, unsigned basePathLen);
// getters & setters
void setIsRunning(bool isRunning)
{
const std::lock_guard<Mutex> lock(statusMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
};

View File

@@ -0,0 +1,32 @@
#pragma once
#include <app/App.h>
#include <common/components/streamlistenerv2/StreamListenerV2.h>
#include <program/Program.h>
/**
* Other than common StreamListenerV2, this class can handle mutliple work queues through an
* overridden getWorkQueue() method.
*/
class StorageStreamListenerV2 : public StreamListenerV2
{
public:
StorageStreamListenerV2(std::string listenerID, AbstractApp* app):
StreamListenerV2(listenerID, app, NULL)
{
// nothing to be done here
}
virtual ~StorageStreamListenerV2() {}
protected:
// getters & setters
virtual MultiWorkQueue* getWorkQueue(uint16_t targetID) const
{
return Program::getApp()->getWorkQueue(targetID);
}
};

View File

@@ -0,0 +1,84 @@
#include <common/app/log/LogContext.h>
#include <common/benchmark/StorageBench.h>
#include <common/toolkit/StringTk.h>
#include <program/Program.h>
#include "StorageBenchWork.h"
void StorageBenchWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen)
{
const char* logContext = "Storage Benchmark (run)";
App* app = Program::getApp();
Config* cfg = app->getConfig();
int workRes = 0; // return value for benchmark operator
ssize_t ioRes = 0; // read/write result
if (this->type == StorageBenchType_READ)
{
size_t readSize = cfg->getTuneFileReadSize();
size_t toBeRead = this->bufLen;
size_t bufOffset = 0;
while(toBeRead)
{
size_t currentReadSize = BEEGFS_MIN(readSize, toBeRead);
ioRes = read(this->fileDescriptor, &this->buf[bufOffset], currentReadSize);
if (ioRes <= 0)
break;
toBeRead -= currentReadSize;
bufOffset += currentReadSize;
}
app->getNodeOpStats()->updateNodeOp(0, StorageOpCounter_READOPS,
this->bufLen, NETMSG_DEFAULT_USERID);
}
else
if (this->type == StorageBenchType_WRITE)
{
size_t writeSize = cfg->getTuneFileWriteSize();
size_t toBeWritten = this->bufLen;
size_t bufOffset = 0;
while(toBeWritten)
{
size_t currentWriteSize = BEEGFS_MIN(writeSize, toBeWritten);
ioRes = write(this->fileDescriptor, &this->buf[bufOffset], currentWriteSize);
if (ioRes <= 0)
break;
toBeWritten -= currentWriteSize;
bufOffset += currentWriteSize;
}
app->getNodeOpStats()->updateNodeOp(0, StorageOpCounter_WRITEOPS,
this->bufLen, NETMSG_DEFAULT_USERID);
}
else
{ // unknown benchmark type
workRes = STORAGEBENCH_ERROR_WORKER_ERROR;
LogContext(logContext).logErr("Error: unknown benchmark type");
}
if(unlikely(workRes < 0) || unlikely(ioRes == -1) )
{ // error occurred
if (ioRes == -1)
{ // read or write operation failed
LogContext(logContext).logErr(std::string("Error: I/O failure. SysErr: ") +
System::getErrString() );
}
workRes = STORAGEBENCH_ERROR_WORKER_ERROR;
this->operatorCommunication->getWriteFD()->write(&workRes, sizeof(int) );
}
else
{ // success
this->operatorCommunication->getWriteFD()->write(&this->threadID, sizeof(int) );
}
}

View File

@@ -0,0 +1,43 @@
#pragma once
#include <common/benchmark/StorageBench.h>
#include <common/components/worker/Work.h>
#include <common/toolkit/Pipe.h>
#include <common/Common.h>
class StorageBenchWork: public Work
{
public:
StorageBenchWork(uint16_t targetID, int threadID, int fileDescriptor,
StorageBenchType type, int64_t bufLen, Pipe* operatorCommunication, char* buf)
{
this->targetID = targetID;
this->threadID = threadID;
this->fileDescriptor = fileDescriptor;
this->type = type;
this->bufLen = bufLen;
this->operatorCommunication = operatorCommunication;
this->buf = buf;
}
virtual ~StorageBenchWork()
{
}
void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
protected:
private:
uint16_t targetID;
int threadID; // virtual threadID
int fileDescriptor;
StorageBenchType type;
int64_t bufLen;
char* buf;
Pipe* operatorCommunication;
};