New upstream version 8.1.0
This commit is contained in:
203
fsck/source/components/DataFetcher.cpp
Normal file
203
fsck/source/components/DataFetcher.cpp
Normal file
@@ -0,0 +1,203 @@
|
||||
#include "DataFetcher.h"
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <common/storage/Storagedata.h>
|
||||
#include <components/worker/RetrieveChunksWork.h>
|
||||
#include <components/worker/RetrieveDirEntriesWork.h>
|
||||
#include <components/worker/RetrieveFsIDsWork.h>
|
||||
#include <components/worker/RetrieveInodesWork.h>
|
||||
#include <program/Program.h>
|
||||
#include <toolkit/FsckTkEx.h>
|
||||
#include <toolkit/FsckException.h>
|
||||
|
||||
DataFetcher::DataFetcher(FsckDB& db, bool forceRestart)
|
||||
: database(&db),
|
||||
workQueue(Program::getApp()->getWorkQueue() ),
|
||||
generatedPackages(0),
|
||||
forceRestart(forceRestart)
|
||||
{
|
||||
}
|
||||
|
||||
FhgfsOpsErr DataFetcher::execute()
|
||||
{
|
||||
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
|
||||
|
||||
NodeStore* metaNodeStore = Program::getApp()->getMetaNodes();
|
||||
auto metaNodeList = metaNodeStore->referenceAllNodes();
|
||||
|
||||
printStatus();
|
||||
|
||||
retrieveDirEntries(metaNodeList);
|
||||
retrieveInodes(metaNodeList);
|
||||
const bool retrieveRes = retrieveChunks();
|
||||
|
||||
if (!retrieveRes)
|
||||
{
|
||||
retVal = FhgfsOpsErr_INUSE;
|
||||
Program::getApp()->abort();
|
||||
}
|
||||
|
||||
// wait for all packages to finish, because we cannot proceed if not all data was fetched
|
||||
// BUT : update output each OUTPUT_INTERVAL_MS ms
|
||||
while (!finishedPackages.timedWaitForCount(generatedPackages, DATAFETCHER_OUTPUT_INTERVAL_MS))
|
||||
{
|
||||
printStatus();
|
||||
|
||||
if (retVal != FhgfsOpsErr_INUSE && Program::getApp()->getShallAbort())
|
||||
{
|
||||
// setting retVal to INTERRUPTED
|
||||
// but still, we needed to wait for the workers to terminate, because of the
|
||||
// SynchronizedCounter (this object cannnot be destroyed before all workers terminate)
|
||||
retVal = FhgfsOpsErr_INTERRUPTED;
|
||||
}
|
||||
}
|
||||
|
||||
if (retVal == FhgfsOpsErr_SUCCESS && fatalErrorsFound.read() > 0)
|
||||
retVal = FhgfsOpsErr_INTERNAL;
|
||||
|
||||
if(retVal == FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
std::set<FsckTargetID> allUsedTargets;
|
||||
|
||||
while(!this->usedTargets.empty() )
|
||||
{
|
||||
allUsedTargets.insert(this->usedTargets.front().begin(), this->usedTargets.front().end() );
|
||||
this->usedTargets.pop_front();
|
||||
}
|
||||
|
||||
std::list<FsckTargetID> usedTargetsList(allUsedTargets.begin(), allUsedTargets.end() );
|
||||
|
||||
this->database->getUsedTargetIDsTable()->insert(usedTargetsList,
|
||||
this->database->getUsedTargetIDsTable()->newBulkHandle() );
|
||||
}
|
||||
|
||||
printStatus(true);
|
||||
|
||||
FsckTkEx::fsckOutput(""); // just a new line
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void DataFetcher::retrieveDirEntries(const std::vector<NodeHandle>& nodes)
|
||||
{
|
||||
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
|
||||
{
|
||||
Node& node = **nodeIter;
|
||||
|
||||
int requestsPerNode = 2;
|
||||
|
||||
unsigned hashDirsPerRequest = (unsigned)(META_DENTRIES_LEVEL1_SUBDIR_NUM/requestsPerNode);
|
||||
|
||||
unsigned hashDirStart = 0;
|
||||
unsigned hashDirEnd = 0;
|
||||
|
||||
do
|
||||
{
|
||||
hashDirEnd = hashDirStart + hashDirsPerRequest;
|
||||
|
||||
// fetch DirEntries
|
||||
|
||||
// before we create a package we increment the generated packages counter
|
||||
this->generatedPackages++;
|
||||
this->usedTargets.insert(this->usedTargets.end(), std::set<FsckTargetID>() );
|
||||
|
||||
this->workQueue->addIndirectWork(
|
||||
new RetrieveDirEntriesWork(database, node, &finishedPackages, fatalErrorsFound,
|
||||
hashDirStart, BEEGFS_MIN(hashDirEnd, META_DENTRIES_LEVEL1_SUBDIR_NUM - 1),
|
||||
&numDentriesFound, &numFileInodesFound, usedTargets.back()));
|
||||
|
||||
// fetch fsIDs
|
||||
|
||||
// before we create a package we increment the generated packages counter
|
||||
this->generatedPackages++;
|
||||
|
||||
this->workQueue->addIndirectWork(
|
||||
new RetrieveFsIDsWork(database, node, &finishedPackages, fatalErrorsFound, hashDirStart,
|
||||
BEEGFS_MIN(hashDirEnd, META_DENTRIES_LEVEL1_SUBDIR_NUM - 1)));
|
||||
|
||||
hashDirStart = hashDirEnd + 1;
|
||||
} while (hashDirEnd < META_DENTRIES_LEVEL1_SUBDIR_NUM);
|
||||
}
|
||||
}
|
||||
|
||||
void DataFetcher::retrieveInodes(const std::vector<NodeHandle>& nodes)
|
||||
{
|
||||
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
|
||||
{
|
||||
Node& node = **nodeIter;
|
||||
|
||||
int requestsPerNode = 2;
|
||||
|
||||
unsigned hashDirsPerRequest = (unsigned)(META_INODES_LEVEL1_SUBDIR_NUM/ requestsPerNode);
|
||||
|
||||
unsigned hashDirStart = 0;
|
||||
unsigned hashDirEnd = 0;
|
||||
|
||||
do
|
||||
{
|
||||
// before we create a package we increment the generated packages counter
|
||||
this->generatedPackages++;
|
||||
this->usedTargets.insert(this->usedTargets.end(), std::set<FsckTargetID>() );
|
||||
|
||||
hashDirEnd = hashDirStart + hashDirsPerRequest;
|
||||
|
||||
this->workQueue->addIndirectWork(
|
||||
new RetrieveInodesWork(database, node, &finishedPackages, fatalErrorsFound,
|
||||
hashDirStart, BEEGFS_MIN(hashDirEnd, META_INODES_LEVEL1_SUBDIR_NUM - 1),
|
||||
&numFileInodesFound, &numDirInodesFound, usedTargets.back()));
|
||||
|
||||
hashDirStart = hashDirEnd + 1;
|
||||
} while (hashDirEnd < META_INODES_LEVEL1_SUBDIR_NUM);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the RetrieveChunksWork for each storage node to the work queue.
|
||||
* @returns true if successful, false if work can't be started because another fsck is already
|
||||
* running or was aborted prematurely
|
||||
*/
|
||||
bool DataFetcher::retrieveChunks()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
|
||||
NodeStore* storageNodes = app->getStorageNodes();
|
||||
|
||||
// for each server create a work package to retrieve chunks
|
||||
for (const auto& node : storageNodes->referenceAllNodes())
|
||||
{
|
||||
// before we create a package we increment the generated packages counter
|
||||
this->generatedPackages++;
|
||||
|
||||
RetrieveChunksWork* retrieveWork = new RetrieveChunksWork(database, node, &finishedPackages,
|
||||
&numChunksFound, forceRestart);
|
||||
|
||||
// node will be released inside of work package
|
||||
workQueue->addIndirectWork(retrieveWork);
|
||||
|
||||
bool started;
|
||||
retrieveWork->waitForStarted(&started);
|
||||
if (!started)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void DataFetcher::printStatus(bool toLogFile)
|
||||
{
|
||||
uint64_t dentryCount = numDentriesFound.read();
|
||||
uint64_t fileInodeCount = numFileInodesFound.read();
|
||||
uint64_t dirInodeCount = numDirInodesFound.read();
|
||||
uint64_t chunkCount = numChunksFound.read();
|
||||
|
||||
std::string outputStr = "Fetched data > Directory entries: " + StringTk::uint64ToStr(dentryCount)
|
||||
+ " | Inodes: " + StringTk::uint64ToStr(fileInodeCount+dirInodeCount) + " | Chunks: " +
|
||||
StringTk::uint64ToStr(chunkCount);
|
||||
|
||||
int outputFlags = OutputOptions_LINEDELETE;
|
||||
|
||||
if (!toLogFile)
|
||||
outputFlags = outputFlags | OutputOptions_NOLOG;
|
||||
|
||||
FsckTkEx::fsckOutput(outputStr, outputFlags);
|
||||
}
|
||||
43
fsck/source/components/DataFetcher.h
Normal file
43
fsck/source/components/DataFetcher.h
Normal file
@@ -0,0 +1,43 @@
|
||||
#ifndef DATAFETCHER_H_
|
||||
#define DATAFETCHER_H_
|
||||
|
||||
#include <common/components/worker/queue/MultiWorkQueue.h>
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/toolkit/SynchronizedCounter.h>
|
||||
#include <database/FsckDB.h>
|
||||
|
||||
#define DATAFETCHER_OUTPUT_INTERVAL_MS 2000
|
||||
|
||||
class DataFetcher
|
||||
{
|
||||
private:
|
||||
FsckDB* database;
|
||||
|
||||
MultiWorkQueue* workQueue;
|
||||
|
||||
AtomicUInt64 numDentriesFound;
|
||||
AtomicUInt64 numFileInodesFound;
|
||||
AtomicUInt64 numDirInodesFound;
|
||||
AtomicUInt64 numChunksFound;
|
||||
|
||||
public:
|
||||
DataFetcher(FsckDB& db, bool forceRestart);
|
||||
|
||||
FhgfsOpsErr execute();
|
||||
|
||||
private:
|
||||
SynchronizedCounter finishedPackages;
|
||||
AtomicUInt64 fatalErrorsFound;
|
||||
unsigned generatedPackages;
|
||||
bool forceRestart;
|
||||
|
||||
std::list<std::set<FsckTargetID> > usedTargets;
|
||||
|
||||
void retrieveDirEntries(const std::vector<NodeHandle>& nodes);
|
||||
void retrieveInodes(const std::vector<NodeHandle>& nodes);
|
||||
bool retrieveChunks();
|
||||
|
||||
void printStatus(bool toLogFile = false);
|
||||
};
|
||||
|
||||
#endif /* DATAFETCHER_H_ */
|
||||
53
fsck/source/components/DatagramListener.cpp
Normal file
53
fsck/source/components/DatagramListener.cpp
Normal file
@@ -0,0 +1,53 @@
|
||||
#include "DatagramListener.h"
|
||||
|
||||
#include <common/net/message/NetMessageLogHelper.h>
|
||||
|
||||
DatagramListener::DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
|
||||
AcknowledgmentStore* ackStore, unsigned short udpPort, bool restrictOutboundInterfaces) :
|
||||
AbstractDatagramListener("DGramLis", netFilter, localNicList, ackStore, udpPort,
|
||||
restrictOutboundInterfaces)
|
||||
{
|
||||
}
|
||||
|
||||
DatagramListener::~DatagramListener()
|
||||
{
|
||||
}
|
||||
|
||||
void DatagramListener::handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg)
|
||||
{
|
||||
HighResolutionStats stats; // currently ignored
|
||||
std::shared_ptr<StandardSocket> sock = findSenderSock(fromAddr->sin_addr);
|
||||
if (sock == nullptr)
|
||||
{
|
||||
log.log(Log_WARNING, "Could not handle incoming message: no socket");
|
||||
return;
|
||||
}
|
||||
|
||||
NetMessage::ResponseContext rctx(fromAddr, sock.get(), sendBuf, DGRAMMGR_SENDBUF_SIZE, &stats);
|
||||
|
||||
const auto messageType = netMessageTypeToStr(msg->getMsgType());
|
||||
|
||||
switch(msg->getMsgType() )
|
||||
{
|
||||
// valid messages within this context
|
||||
case NETMSGTYPE_Heartbeat:
|
||||
case NETMSGTYPE_FsckModificationEvent:
|
||||
{
|
||||
if(!msg->processIncoming(rctx) )
|
||||
{
|
||||
LOG(GENERAL, WARNING,
|
||||
"Problem encountered during handling of incoming message.", messageType);
|
||||
}
|
||||
} break;
|
||||
|
||||
default:
|
||||
{ // valid, but not within this context
|
||||
log.logErr(
|
||||
"Received a message that is invalid within the current context "
|
||||
"from: " + Socket::ipaddrToStr(fromAddr->sin_addr) + "; "
|
||||
"type: " + messageType );
|
||||
} break;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
22
fsck/source/components/DatagramListener.h
Normal file
22
fsck/source/components/DatagramListener.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef DATAGRAMLISTENER_H_
|
||||
#define DATAGRAMLISTENER_H_
|
||||
|
||||
#include <common/components/AbstractDatagramListener.h>
|
||||
|
||||
class DatagramListener : public AbstractDatagramListener
|
||||
{
|
||||
public:
|
||||
DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
|
||||
AcknowledgmentStore* ackStore, unsigned short udpPort,
|
||||
bool restrictOutboundInterfaces);
|
||||
virtual ~DatagramListener();
|
||||
|
||||
|
||||
protected:
|
||||
virtual void handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg);
|
||||
|
||||
private:
|
||||
|
||||
};
|
||||
|
||||
#endif /*DATAGRAMLISTENER_H_*/
|
||||
491
fsck/source/components/InternodeSyncer.cpp
Normal file
491
fsck/source/components/InternodeSyncer.cpp
Normal file
@@ -0,0 +1,491 @@
|
||||
#include <app/config/Config.h>
|
||||
#include <app/App.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/toolkit/NodesTk.h>
|
||||
#include <common/toolkit/Time.h>
|
||||
#include <common/nodes/NodeStore.h>
|
||||
#include <common/nodes/TargetCapacityPools.h>
|
||||
#include <program/Program.h>
|
||||
#include "InternodeSyncer.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
InternodeSyncer::InternodeSyncer() :
|
||||
PThread("XNodeSync"),
|
||||
log("XNodeSync"),
|
||||
serversDownloaded(false)
|
||||
{
|
||||
}
|
||||
|
||||
InternodeSyncer::~InternodeSyncer()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void InternodeSyncer::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
registerSignalHandler();
|
||||
|
||||
// download all nodes,mappings,states and buddy groups
|
||||
NumNodeIDList addedStorageNodes;
|
||||
NumNodeIDList removedStorageNodes;
|
||||
NumNodeIDList addedMetaNodes;
|
||||
NumNodeIDList removedMetaNodes;
|
||||
bool syncRes = downloadAndSyncNodes(addedStorageNodes, removedStorageNodes, addedMetaNodes,
|
||||
removedMetaNodes);
|
||||
if (!syncRes)
|
||||
{
|
||||
log.logErr("Error downloading nodes from mgmtd.");
|
||||
Program::getApp()->abort();
|
||||
return;
|
||||
}
|
||||
|
||||
syncRes = downloadAndSyncTargetMappings();
|
||||
if (!syncRes)
|
||||
{
|
||||
log.logErr("Error downloading target mappings from mgmtd.");
|
||||
Program::getApp()->abort();
|
||||
return;
|
||||
}
|
||||
|
||||
originalTargetMap = Program::getApp()->getTargetMapper()->getMapping();
|
||||
|
||||
syncRes = downloadAndSyncTargetStates();
|
||||
if (!syncRes)
|
||||
{
|
||||
log.logErr("Error downloading target states from mgmtd.");
|
||||
Program::getApp()->abort();
|
||||
return;
|
||||
}
|
||||
|
||||
syncRes = downloadAndSyncMirrorBuddyGroups();
|
||||
if ( !syncRes )
|
||||
{
|
||||
log.logErr("Error downloading mirror buddy groups from mgmtd.");
|
||||
Program::getApp()->abort();
|
||||
return;
|
||||
}
|
||||
|
||||
Program::getApp()->getMirrorBuddyGroupMapper()->getMirrorBuddyGroups(
|
||||
originalMirrorBuddyGroupMap);
|
||||
|
||||
syncRes = downloadAndSyncMetaMirrorBuddyGroups();
|
||||
if ( !syncRes )
|
||||
{
|
||||
log.logErr("Error downloading metadata mirror buddy groups from mgmtd.");
|
||||
Program::getApp()->abort();
|
||||
return;
|
||||
}
|
||||
|
||||
Program::getApp()->getMetaMirrorBuddyGroupMapper()->getMirrorBuddyGroups(
|
||||
originalMetaMirrorBuddyGroupMap);
|
||||
|
||||
{
|
||||
std::lock_guard<Mutex> lock(serversDownloadedMutex);
|
||||
serversDownloaded = true;
|
||||
serversDownloadedCondition.signal();
|
||||
}
|
||||
|
||||
syncLoop();
|
||||
|
||||
log.log(Log_DEBUG, "Component stopped.");
|
||||
}
|
||||
catch(std::exception& e)
|
||||
{
|
||||
PThread::getCurrentThreadApp()->handleComponentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
void InternodeSyncer::syncLoop()
|
||||
{
|
||||
const int sleepIntervalMS = 3*1000; // 3sec
|
||||
const unsigned downloadNodesAndStatesIntervalMS = 30000; // 30 sec
|
||||
const unsigned checkNetworkIntervalMS = 60*1000; // 1 minute
|
||||
|
||||
Time lastDownloadNodesAndStatesT;
|
||||
Time lastCheckNetworkT;
|
||||
|
||||
while(!waitForSelfTerminateOrder(sleepIntervalMS) )
|
||||
{
|
||||
// download & sync nodes
|
||||
if (lastDownloadNodesAndStatesT.elapsedMS() > downloadNodesAndStatesIntervalMS)
|
||||
{
|
||||
NumNodeIDList addedStorageNodes;
|
||||
NumNodeIDList removedStorageNodes;
|
||||
NumNodeIDList addedMetaNodes;
|
||||
NumNodeIDList removedMetaNodes;
|
||||
bool syncRes = downloadAndSyncNodes(addedStorageNodes, removedStorageNodes, addedMetaNodes,
|
||||
removedMetaNodes);
|
||||
|
||||
if (!syncRes)
|
||||
{
|
||||
log.logErr("Error downloading nodes from mgmtd.");
|
||||
Program::getApp()->abort();
|
||||
break;
|
||||
}
|
||||
|
||||
handleNodeChanges(NODETYPE_Meta, addedMetaNodes, removedMetaNodes);
|
||||
|
||||
handleNodeChanges(NODETYPE_Storage, addedStorageNodes, removedStorageNodes);
|
||||
|
||||
syncRes = downloadAndSyncTargetMappings();
|
||||
if (!syncRes)
|
||||
{
|
||||
log.logErr("Error downloading target mappings from mgmtd.");
|
||||
Program::getApp()->abort();
|
||||
break;
|
||||
}
|
||||
|
||||
handleTargetMappingChanges();
|
||||
|
||||
syncRes = downloadAndSyncTargetStates();
|
||||
if (!syncRes)
|
||||
{
|
||||
log.logErr("Error downloading target states from mgmtd.");
|
||||
Program::getApp()->abort();
|
||||
break;
|
||||
}
|
||||
|
||||
syncRes = downloadAndSyncMirrorBuddyGroups();
|
||||
if ( !syncRes )
|
||||
{
|
||||
log.logErr("Error downloading mirror buddy groups from mgmtd.");
|
||||
Program::getApp()->abort();
|
||||
break;
|
||||
}
|
||||
|
||||
handleBuddyGroupChanges();
|
||||
|
||||
lastDownloadNodesAndStatesT.setToNow();
|
||||
}
|
||||
|
||||
bool checkNetworkForced = getAndResetForceCheckNetwork();
|
||||
|
||||
if( checkNetworkForced ||
|
||||
(lastCheckNetworkT.elapsedMS() > checkNetworkIntervalMS))
|
||||
{
|
||||
checkNetwork();
|
||||
lastCheckNetworkT.setToNow();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return false on error.
|
||||
*/
|
||||
bool InternodeSyncer::downloadAndSyncTargetStates()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
NodeStore* mgmtNodes = app->getMgmtNodes();
|
||||
TargetStateStore* targetStateStore = app->getTargetStateStore();
|
||||
|
||||
auto node = mgmtNodes->referenceFirstNode();
|
||||
if(!node)
|
||||
return false;
|
||||
|
||||
UInt16List targetIDs;
|
||||
UInt8List reachabilityStates;
|
||||
UInt8List consistencyStates;
|
||||
|
||||
bool downloadRes = NodesTk::downloadTargetStates(*node, NODETYPE_Storage,
|
||||
&targetIDs, &reachabilityStates, &consistencyStates, false);
|
||||
|
||||
if(downloadRes)
|
||||
targetStateStore->syncStatesFromLists(targetIDs, reachabilityStates,
|
||||
consistencyStates);
|
||||
|
||||
return downloadRes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return false on error
|
||||
*/
|
||||
bool InternodeSyncer::downloadAndSyncNodes(NumNodeIDList& addedStorageNodes,
|
||||
NumNodeIDList& removedStorageNodes, NumNodeIDList& addedMetaNodes,
|
||||
NumNodeIDList& removedMetaNodes)
|
||||
{
|
||||
const char* logContext = "Nodes sync";
|
||||
|
||||
App* app = Program::getApp();
|
||||
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
||||
NodeStoreServers* metaNodes = app->getMetaNodes();
|
||||
NodeStoreServers* storageNodes = app->getStorageNodes();
|
||||
Node& localNode = app->getLocalNode();
|
||||
|
||||
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
||||
if (!mgmtNode)
|
||||
return false;
|
||||
|
||||
{ // storage nodes
|
||||
std::vector<NodeHandle> storageNodesList;
|
||||
|
||||
bool storageRes =
|
||||
NodesTk::downloadNodes(*mgmtNode, NODETYPE_Storage, storageNodesList, false);
|
||||
if(!storageRes)
|
||||
return false;
|
||||
|
||||
storageNodes->syncNodes(storageNodesList, &addedStorageNodes, &removedStorageNodes,
|
||||
&localNode);
|
||||
printSyncNodesResults(NODETYPE_Storage, &addedStorageNodes, &removedStorageNodes);
|
||||
}
|
||||
|
||||
{ // metadata nodes
|
||||
std::vector<NodeHandle> metaNodesList;
|
||||
NumNodeID rootNodeID;
|
||||
bool rootIsBuddyMirrored;
|
||||
|
||||
bool metaRes =
|
||||
NodesTk::downloadNodes(*mgmtNode, NODETYPE_Meta, metaNodesList, false, &rootNodeID,
|
||||
&rootIsBuddyMirrored);
|
||||
if(!metaRes)
|
||||
return false;
|
||||
|
||||
metaNodes->syncNodes(metaNodesList, &addedMetaNodes, &removedMetaNodes, &localNode);
|
||||
|
||||
if (app->getMetaRoot().setIfDefault(rootNodeID, rootIsBuddyMirrored))
|
||||
{
|
||||
LogContext(logContext).log(Log_CRITICAL,
|
||||
"Root NodeID (from sync results): " + rootNodeID.str() );
|
||||
}
|
||||
|
||||
printSyncNodesResults(NODETYPE_Meta, &addedMetaNodes, &removedMetaNodes);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void InternodeSyncer::printSyncNodesResults(NodeType nodeType, NumNodeIDList* addedNodes,
|
||||
NumNodeIDList* removedNodes)
|
||||
{
|
||||
const char* logContext = "Sync results";
|
||||
|
||||
if (!addedNodes->empty())
|
||||
LogContext(logContext).log(Log_WARNING, std::string("Nodes added: ") +
|
||||
StringTk::uintToStr(addedNodes->size() ) +
|
||||
" (Type: " + boost::lexical_cast<std::string>(nodeType) + ")");
|
||||
|
||||
if (!removedNodes->empty())
|
||||
LogContext(logContext).log(Log_WARNING, std::string("Nodes removed: ") +
|
||||
StringTk::uintToStr(removedNodes->size() ) +
|
||||
" (Type: " + boost::lexical_cast<std::string>(nodeType) + ")");
|
||||
}
|
||||
|
||||
/**
|
||||
* @return false on error
|
||||
*/
|
||||
bool InternodeSyncer::downloadAndSyncTargetMappings()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
||||
TargetMapper* targetMapper = app->getTargetMapper();
|
||||
|
||||
bool retVal = true;
|
||||
|
||||
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
||||
if(!mgmtNode)
|
||||
return false;
|
||||
|
||||
auto mappings = NodesTk::downloadTargetMappings(*mgmtNode, false);
|
||||
if (mappings.first)
|
||||
targetMapper->syncTargets(std::move(mappings.second));
|
||||
else
|
||||
retVal = false;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return false on error
|
||||
*/
|
||||
bool InternodeSyncer::downloadAndSyncMirrorBuddyGroups()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
|
||||
|
||||
bool retVal = true;
|
||||
|
||||
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
||||
if(!mgmtNode)
|
||||
return false;
|
||||
|
||||
UInt16List buddyGroupIDs;
|
||||
UInt16List primaryTargetIDs;
|
||||
UInt16List secondaryTargetIDs;
|
||||
|
||||
bool downloadRes = NodesTk::downloadMirrorBuddyGroups(*mgmtNode, NODETYPE_Storage,
|
||||
&buddyGroupIDs, &primaryTargetIDs, &secondaryTargetIDs, false);
|
||||
|
||||
if(downloadRes)
|
||||
buddyGroupMapper->syncGroupsFromLists(buddyGroupIDs, primaryTargetIDs, secondaryTargetIDs,
|
||||
NumNodeID() );
|
||||
else
|
||||
retVal = false;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return false on error
|
||||
*/
|
||||
bool InternodeSyncer::downloadAndSyncMetaMirrorBuddyGroups()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMetaMirrorBuddyGroupMapper();
|
||||
|
||||
bool retVal = true;
|
||||
|
||||
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
||||
if(!mgmtNode)
|
||||
return false;
|
||||
|
||||
UInt16List buddyGroupIDs;
|
||||
UInt16List primaryTargetIDs;
|
||||
UInt16List secondaryTargetIDs;
|
||||
|
||||
bool downloadRes = NodesTk::downloadMirrorBuddyGroups(*mgmtNode, NODETYPE_Meta, &buddyGroupIDs,
|
||||
&primaryTargetIDs, &secondaryTargetIDs, false);
|
||||
|
||||
if(downloadRes)
|
||||
buddyGroupMapper->syncGroupsFromLists(buddyGroupIDs, primaryTargetIDs, secondaryTargetIDs,
|
||||
NumNodeID() );
|
||||
else
|
||||
retVal = false;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void InternodeSyncer::handleNodeChanges(NodeType nodeType, NumNodeIDList& addedNodes,
|
||||
NumNodeIDList& removedNodes)
|
||||
{
|
||||
const char* logContext = "handleNodeChanges";
|
||||
|
||||
if (!addedNodes.empty())
|
||||
LogContext(logContext).log(Log_WARNING,
|
||||
std::string("Nodes added while beegfs-fsck was running: ")
|
||||
+ StringTk::uintToStr(addedNodes.size())
|
||||
+ " (Type: " + boost::lexical_cast<std::string>(nodeType) + ")");
|
||||
|
||||
if (!removedNodes.empty())
|
||||
{
|
||||
// removed nodes must lead to fsck stoppage
|
||||
LogContext(logContext).log(Log_WARNING,
|
||||
std::string("Nodes removed while beegfs-fsck was running: ")
|
||||
+ StringTk::uintToStr(removedNodes.size())
|
||||
+ " (Type: " + boost::lexical_cast<std::string>(nodeType) + ")");
|
||||
|
||||
Program::getApp()->abort();
|
||||
}
|
||||
}
|
||||
|
||||
void InternodeSyncer::handleTargetMappingChanges()
|
||||
{
|
||||
const char* logContext = "handleTargetMappingChanges";
|
||||
|
||||
TargetMap newTargetMap = Program::getApp()->getTargetMapper()->getMapping();
|
||||
|
||||
for ( TargetMapIter originalMapIter = originalTargetMap.begin();
|
||||
originalMapIter != originalTargetMap.end(); originalMapIter++ )
|
||||
{
|
||||
uint16_t targetID = originalMapIter->first;
|
||||
NumNodeID oldNodeID = originalMapIter->second;
|
||||
|
||||
TargetMapIter newMapIter = newTargetMap.find(targetID);
|
||||
|
||||
if ( newMapIter == newTargetMap.end() )
|
||||
{
|
||||
LogContext(logContext).log(Log_WARNING,
|
||||
"Target removed while beegfs-fsck was running; beegfs-fsck can't continue; targetID: "
|
||||
+ StringTk::uintToStr(targetID));
|
||||
|
||||
Program::getApp()->abort();
|
||||
}
|
||||
else
|
||||
{
|
||||
NumNodeID newNodeID = newMapIter->second;
|
||||
if ( oldNodeID != newNodeID )
|
||||
{
|
||||
LogContext(logContext).log(Log_WARNING,
|
||||
"Target re-mapped while beegfs-fsck was running; beegfs-fsck can't continue; "
|
||||
"targetID: " + StringTk::uintToStr(targetID));
|
||||
|
||||
Program::getApp()->abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InternodeSyncer::handleBuddyGroupChanges()
|
||||
{
|
||||
const char* logContext = "handleBuddyGroupChanges";
|
||||
|
||||
MirrorBuddyGroupMap newMirrorBuddyGroupMap;
|
||||
Program::getApp()->getMirrorBuddyGroupMapper()->getMirrorBuddyGroups(newMirrorBuddyGroupMap);
|
||||
|
||||
for ( MirrorBuddyGroupMapIter originalMapIter = originalMirrorBuddyGroupMap.begin();
|
||||
originalMapIter != originalMirrorBuddyGroupMap.end(); originalMapIter++ )
|
||||
{
|
||||
uint16_t buddyGroupID = originalMapIter->first;
|
||||
MirrorBuddyGroup oldBuddyGroup = originalMapIter->second;
|
||||
|
||||
MirrorBuddyGroupMapIter newMapIter = newMirrorBuddyGroupMap.find(buddyGroupID);
|
||||
|
||||
if ( newMapIter == newMirrorBuddyGroupMap.end() )
|
||||
{
|
||||
LogContext(logContext).log(Log_WARNING,
|
||||
"Mirror buddy group removed while beegfs-fsck was running; beegfs-fsck can't continue; "
|
||||
"groupID: " + StringTk::uintToStr(buddyGroupID));
|
||||
|
||||
Program::getApp()->abort();
|
||||
}
|
||||
else
|
||||
{
|
||||
MirrorBuddyGroup newBuddyGroup = newMapIter->second;
|
||||
if ( oldBuddyGroup.firstTargetID != newBuddyGroup.firstTargetID )
|
||||
{
|
||||
LogContext(logContext).log(Log_WARNING,
|
||||
"Primary of mirror buddy group changed while beegfs-fsck was running; beegfs-fsck "
|
||||
"can't continue; groupID: " + StringTk::uintToStr(buddyGroupID));
|
||||
|
||||
Program::getApp()->abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Blocks until list of servers has been downloaded from management node
|
||||
*/
|
||||
void InternodeSyncer::waitForServers()
|
||||
{
|
||||
std::lock_guard<Mutex> lock(serversDownloadedMutex);
|
||||
while (!serversDownloaded)
|
||||
serversDownloadedCondition.wait(&serversDownloadedMutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect the available and allowed network interfaces for any changes.
|
||||
*/
|
||||
bool InternodeSyncer::checkNetwork()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
NicAddressList newLocalNicList;
|
||||
bool res = false;
|
||||
|
||||
app->findAllowedInterfaces(newLocalNicList);
|
||||
app->findAllowedRDMAInterfaces(newLocalNicList);
|
||||
if (!std::equal(newLocalNicList.begin(), newLocalNicList.end(), app->getLocalNicList().begin()))
|
||||
{
|
||||
log.log(Log_NOTICE, "checkNetwork: local interfaces have changed");
|
||||
app->updateLocalNicList(newLocalNicList);
|
||||
res = true;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
95
fsck/source/components/InternodeSyncer.h
Normal file
95
fsck/source/components/InternodeSyncer.h
Normal file
@@ -0,0 +1,95 @@
|
||||
#ifndef INTERNODESYNCER_H_
|
||||
#define INTERNODESYNCER_H_
|
||||
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/components/ComponentInitException.h>
|
||||
#include <common/nodes/MirrorBuddyGroupMapper.h>
|
||||
#include <common/threading/PThread.h>
|
||||
#include <common/Common.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
class InternodeSyncer : public PThread
|
||||
{
|
||||
public:
|
||||
InternodeSyncer();
|
||||
virtual ~InternodeSyncer();
|
||||
|
||||
bool downloadAndSyncNodes(NumNodeIDList& addedStorageNodes,
|
||||
NumNodeIDList& removedStorageNodes, NumNodeIDList& addedMetaNodes,
|
||||
NumNodeIDList& removedMetaNodes);
|
||||
bool downloadAndSyncTargetMappings();
|
||||
bool downloadAndSyncMirrorBuddyGroups();
|
||||
bool downloadAndSyncMetaMirrorBuddyGroups();
|
||||
bool downloadAndSyncTargetStates();
|
||||
|
||||
private:
|
||||
LogContext log;
|
||||
Mutex forceNodesAndTargetStatesUpdateMutex;
|
||||
bool forceNodesAndTargetStatesUpdate;
|
||||
Mutex forceCheckNetworkMutex;
|
||||
bool forceCheckNetwork; // true to force check of network interfaces
|
||||
|
||||
TargetMap originalTargetMap;
|
||||
MirrorBuddyGroupMap originalMirrorBuddyGroupMap;
|
||||
MirrorBuddyGroupMap originalMetaMirrorBuddyGroupMap;
|
||||
|
||||
virtual void run();
|
||||
void syncLoop();
|
||||
void handleNodeChanges(NodeType nodeType, NumNodeIDList& addedNodes,
|
||||
NumNodeIDList& removedNodes);
|
||||
void handleTargetMappingChanges();
|
||||
void handleBuddyGroupChanges();
|
||||
|
||||
bool getAndResetForceCheckNetwork()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(forceCheckNetworkMutex);
|
||||
|
||||
bool retVal = this->forceCheckNetwork;
|
||||
|
||||
this->forceCheckNetwork = false;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool checkNetwork();
|
||||
|
||||
Condition serversDownloadedCondition;
|
||||
Mutex serversDownloadedMutex;
|
||||
bool serversDownloaded;
|
||||
|
||||
public:
|
||||
void waitForServers();
|
||||
|
||||
void setForceCheckNetwork()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(forceCheckNetworkMutex);
|
||||
|
||||
this->forceCheckNetwork = true;
|
||||
}
|
||||
|
||||
private:
|
||||
static void printSyncNodesResults(NodeType nodeType, NumNodeIDList* addedNodes,
|
||||
NumNodeIDList* removedNodes);
|
||||
|
||||
void setForceNodesAndTargetStatesUpdate()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(forceNodesAndTargetStatesUpdateMutex);
|
||||
|
||||
this->forceNodesAndTargetStatesUpdate = true;
|
||||
}
|
||||
|
||||
bool getAndResetForceNodesAndTargetStatesUpdate()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(forceNodesAndTargetStatesUpdateMutex);
|
||||
|
||||
bool retVal = this->forceNodesAndTargetStatesUpdate;
|
||||
|
||||
this->forceNodesAndTargetStatesUpdate = false;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif /* INTERNODESYNCER_H_ */
|
||||
98
fsck/source/components/ModificationEventHandler.cpp
Normal file
98
fsck/source/components/ModificationEventHandler.cpp
Normal file
@@ -0,0 +1,98 @@
|
||||
#include "ModificationEventHandler.h"
|
||||
|
||||
#include <common/toolkit/ZipIterator.h>
|
||||
#include <database/FsckDBException.h>
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
ModificationEventHandler::ModificationEventHandler(FsckDBModificationEventsTable& table)
|
||||
: PThread("ModificationEventHandler"),
|
||||
table(&table)
|
||||
{
|
||||
}
|
||||
|
||||
void ModificationEventHandler::run()
|
||||
{
|
||||
FsckDBModificationEventsTable::BulkHandle bulkHandle(table->newBulkHandle() );
|
||||
while ( !getSelfTerminate() )
|
||||
{
|
||||
std::unique_lock<Mutex> bufferListSafeLock(bufferListMutex); // LOCK BUFFER
|
||||
|
||||
// make sure to group at least MODHANDLER_MINSIZE_FLUSH flush elements (to not bother the DB
|
||||
// with every single event)
|
||||
if (bufferList.size() < MODHANDLER_MINSIZE_FLUSH)
|
||||
{
|
||||
bufferListSafeLock.unlock(); // UNLOCK BUFFER
|
||||
const std::lock_guard<Mutex> lock(eventsAddedMutex);
|
||||
eventsAddedCond.timedwait(&eventsAddedMutex, 2000);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// create a copy of the buffer list and flush this to DB, so that the buffer will become
|
||||
// free immediately and the incoming messages do not have to wait for DB
|
||||
FsckModificationEventList bufferListCopy;
|
||||
|
||||
bufferListCopy.splice(bufferListCopy.begin(), bufferList);
|
||||
|
||||
bufferListSafeLock.unlock(); // UNLOCK BUFFER
|
||||
|
||||
table->insert(bufferListCopy, bulkHandle);
|
||||
}
|
||||
}
|
||||
|
||||
// a last flush after component stopped
|
||||
FsckModificationEventList bufferListCopy;
|
||||
|
||||
{
|
||||
const std::lock_guard<Mutex> bufferListLock(bufferListMutex);
|
||||
bufferListCopy.splice(bufferListCopy.begin(), bufferList);
|
||||
}
|
||||
|
||||
table->insert(bufferListCopy, bulkHandle);
|
||||
}
|
||||
|
||||
bool ModificationEventHandler::add(UInt8List& eventTypeList, StringList& entryIDList)
|
||||
{
|
||||
const char* logContext = "ModificationEventHandler (add)";
|
||||
|
||||
if ( unlikely(eventTypeList.size() != entryIDList.size()) )
|
||||
{
|
||||
LogContext(logContext).logErr("Unable to add events. The lists do not have equal sizes.");
|
||||
return false;
|
||||
}
|
||||
|
||||
while ( true )
|
||||
{
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(bufferListMutex);
|
||||
if (this->bufferList.size() < MODHANDLER_MAXSIZE_EVENTLIST)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(eventsFlushedMutex);
|
||||
this->eventsFlushedCond.timedwait(&eventsFlushedMutex, 2000);
|
||||
}
|
||||
}
|
||||
|
||||
ZipIterRange<UInt8List, StringList> eventTypeEntryIDIter(eventTypeList, entryIDList);
|
||||
|
||||
{
|
||||
const std::lock_guard<Mutex> bufferListLock(bufferListMutex);
|
||||
|
||||
for ( ; !eventTypeEntryIDIter.empty(); ++eventTypeEntryIDIter)
|
||||
{
|
||||
FsckModificationEvent event((ModificationEventType)*(eventTypeEntryIDIter()->first),
|
||||
*(eventTypeEntryIDIter()->second) );
|
||||
this->bufferList.push_back(event);
|
||||
}
|
||||
}
|
||||
|
||||
this->eventsAddedCond.signal();
|
||||
|
||||
return true;
|
||||
}
|
||||
44
fsck/source/components/ModificationEventHandler.h
Normal file
44
fsck/source/components/ModificationEventHandler.h
Normal file
@@ -0,0 +1,44 @@
|
||||
#ifndef MODIFICATIONEVENTHANDLER_H
|
||||
#define MODIFICATIONEVENTHANDLER_H
|
||||
|
||||
#include <common/fsck/FsckModificationEvent.h>
|
||||
#include <common/threading/Condition.h>
|
||||
#include <database/FsckDBTable.h>
|
||||
|
||||
|
||||
#define MODHANDLER_MAXSIZE_EVENTLIST 50000
|
||||
#define MODHANDLER_MINSIZE_FLUSH 200
|
||||
|
||||
class ModificationEventHandler: public PThread
|
||||
{
|
||||
public:
|
||||
ModificationEventHandler(FsckDBModificationEventsTable& table);
|
||||
|
||||
virtual void run();
|
||||
|
||||
bool add(UInt8List& eventTypeList, StringList& entryIDList);
|
||||
|
||||
private:
|
||||
FsckDBModificationEventsTable* table;
|
||||
|
||||
FsckModificationEventList bufferList;
|
||||
|
||||
Mutex bufferListMutex;
|
||||
Mutex bufferListCopyMutex;
|
||||
Mutex flushMutex;
|
||||
|
||||
Mutex eventsAddedMutex;
|
||||
Condition eventsAddedCond;
|
||||
Mutex eventsFlushedMutex;
|
||||
Condition eventsFlushedCond;
|
||||
|
||||
public:
|
||||
void stop()
|
||||
{
|
||||
selfTerminate();
|
||||
eventsAddedCond.signal();
|
||||
join();
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* MODIFICATIONEVENTHANDLER_H */
|
||||
97
fsck/source/components/worker/AdjustChunkPermissionsWork.cpp
Normal file
97
fsck/source/components/worker/AdjustChunkPermissionsWork.cpp
Normal file
@@ -0,0 +1,97 @@
|
||||
#include "AdjustChunkPermissionsWork.h"
|
||||
#include <common/net/message/fsck/AdjustChunkPermissionsMsg.h>
|
||||
#include <common/net/message/fsck/AdjustChunkPermissionsRespMsg.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <toolkit/FsckException.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
AdjustChunkPermissionsWork::AdjustChunkPermissionsWork(Node& node, SynchronizedCounter* counter,
|
||||
AtomicUInt64* fileCount, AtomicUInt64* errorCount)
|
||||
: log("AdjustChunkPermissionsWork"),
|
||||
node(node),
|
||||
counter(counter),
|
||||
fileCount(fileCount),
|
||||
errorCount(errorCount)
|
||||
{
|
||||
}
|
||||
|
||||
AdjustChunkPermissionsWork::~AdjustChunkPermissionsWork()
|
||||
{
|
||||
}
|
||||
|
||||
void AdjustChunkPermissionsWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
|
||||
unsigned bufOutLen)
|
||||
{
|
||||
log.log(4, "Processing AdjustChunkPermissionsWork");
|
||||
|
||||
try
|
||||
{
|
||||
doWork(false);
|
||||
doWork(true);
|
||||
// work package finished => increment counter
|
||||
this->counter->incCount();
|
||||
}
|
||||
catch (std::exception &e)
|
||||
{
|
||||
// exception thrown, but work package is finished => increment counter
|
||||
this->counter->incCount();
|
||||
|
||||
// after incrementing counter, re-throw exception
|
||||
throw;
|
||||
}
|
||||
|
||||
log.log(4, "Processed AdjustChunkPermissionsWork");
|
||||
}
|
||||
|
||||
void AdjustChunkPermissionsWork::doWork(bool isBuddyMirrored)
|
||||
{
|
||||
for ( unsigned firstLevelhashDirNum = 0;
|
||||
firstLevelhashDirNum <= META_DENTRIES_LEVEL1_SUBDIR_NUM - 1; firstLevelhashDirNum++ )
|
||||
{
|
||||
for ( unsigned secondLevelhashDirNum = 0;
|
||||
secondLevelhashDirNum < META_DENTRIES_LEVEL2_SUBDIR_NUM; secondLevelhashDirNum++ )
|
||||
{
|
||||
unsigned hashDirNum = StorageTk::mergeHashDirs(firstLevelhashDirNum,
|
||||
secondLevelhashDirNum);
|
||||
|
||||
int64_t hashDirOffset = 0;
|
||||
int64_t contDirOffset = 0;
|
||||
std::string currentContDirID;
|
||||
unsigned resultCount = 0;
|
||||
|
||||
do
|
||||
{
|
||||
AdjustChunkPermissionsMsg adjustChunkPermissionsMsg(hashDirNum, currentContDirID,
|
||||
ADJUST_AT_ONCE, hashDirOffset, contDirOffset, isBuddyMirrored);
|
||||
|
||||
const auto respMsg = MessagingTk::requestResponse(node, adjustChunkPermissionsMsg,
|
||||
NETMSGTYPE_AdjustChunkPermissionsResp);
|
||||
|
||||
if (respMsg)
|
||||
{
|
||||
auto* adjustChunkPermissionsRespMsg = (AdjustChunkPermissionsRespMsg*) respMsg.get();
|
||||
|
||||
// set new parameters
|
||||
currentContDirID = adjustChunkPermissionsRespMsg->getCurrentContDirID();
|
||||
hashDirOffset = adjustChunkPermissionsRespMsg->getNewHashDirOffset();
|
||||
contDirOffset = adjustChunkPermissionsRespMsg->getNewContDirOffset();
|
||||
resultCount = adjustChunkPermissionsRespMsg->getCount();
|
||||
|
||||
this->fileCount->increase(resultCount);
|
||||
|
||||
if (adjustChunkPermissionsRespMsg->getErrorCount() > 0)
|
||||
this->errorCount->increase(adjustChunkPermissionsRespMsg->getErrorCount());
|
||||
}
|
||||
else
|
||||
{
|
||||
throw FsckException("Communication error occured with node " + node.getAlias());
|
||||
}
|
||||
|
||||
// if any of the worker threads threw an exception, we should stop now!
|
||||
if ( Program::getApp()->getSelfTerminate() )
|
||||
return;
|
||||
|
||||
} while ( resultCount > 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
32
fsck/source/components/worker/AdjustChunkPermissionsWork.h
Normal file
32
fsck/source/components/worker/AdjustChunkPermissionsWork.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef ADJUSTCHUNKPERMISSIONSWORK_H
|
||||
#define ADJUSTCHUNKPERMISSIONSWORK_H
|
||||
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/toolkit/SynchronizedCounter.h>
|
||||
|
||||
#include <database/FsckDB.h>
|
||||
|
||||
// the size of one packet, i.e. how many files are adjusted at once; basically just
|
||||
// limited to have some control and give the user feedback
|
||||
#define ADJUST_AT_ONCE 50
|
||||
|
||||
class AdjustChunkPermissionsWork : public Work
|
||||
{
|
||||
public:
|
||||
AdjustChunkPermissionsWork(Node& node, SynchronizedCounter* counter, AtomicUInt64* fileCount,
|
||||
AtomicUInt64* errorCount);
|
||||
virtual ~AdjustChunkPermissionsWork();
|
||||
void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
|
||||
|
||||
private:
|
||||
LogContext log;
|
||||
Node& node;
|
||||
SynchronizedCounter* counter;
|
||||
AtomicUInt64* fileCount;
|
||||
AtomicUInt64* errorCount;
|
||||
|
||||
void doWork(bool isBuddyMirrored);
|
||||
};
|
||||
|
||||
#endif /* ADJUSTCHUNKPERMISSIONSWORK_H */
|
||||
152
fsck/source/components/worker/RetrieveChunksWork.cpp
Normal file
152
fsck/source/components/worker/RetrieveChunksWork.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
#include "RetrieveChunksWork.h"
|
||||
|
||||
#include <common/net/message/fsck/FetchFsckChunkListMsg.h>
|
||||
#include <common/net/message/fsck/FetchFsckChunkListRespMsg.h>
|
||||
#include <common/storage/Storagedata.h>
|
||||
#include <common/toolkit/FsckTk.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/toolkit/StorageTk.h>
|
||||
|
||||
#include <database/FsckDBException.h>
|
||||
#include <toolkit/FsckException.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
RetrieveChunksWork::RetrieveChunksWork(FsckDB* db, NodeHandle node, SynchronizedCounter* counter,
|
||||
AtomicUInt64* numChunksFound, bool forceRestart) :
|
||||
log("RetrieveChunksWork"), node(std::move(node)), counter(counter),
|
||||
numChunksFound(numChunksFound),
|
||||
chunks(db->getChunksTable()), chunksHandle(chunks->newBulkHandle()),
|
||||
malformedChunks(db->getMalformedChunksList()),
|
||||
forceRestart(forceRestart),
|
||||
started(false), startedBarrier(2)
|
||||
{
|
||||
}
|
||||
|
||||
RetrieveChunksWork::~RetrieveChunksWork()
|
||||
{
|
||||
}
|
||||
|
||||
void RetrieveChunksWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
|
||||
{
|
||||
log.log(Log_DEBUG, "Processing RetrieveChunksWork");
|
||||
|
||||
try
|
||||
{
|
||||
doWork();
|
||||
// flush buffers before signaling completion
|
||||
chunks->flush(chunksHandle);
|
||||
// work package finished => increment counter
|
||||
this->counter->incCount();
|
||||
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
// exception thrown, but work package is finished => increment counter
|
||||
this->counter->incCount();
|
||||
|
||||
// after incrementing counter, re-throw exception
|
||||
throw;
|
||||
}
|
||||
|
||||
log.log(Log_DEBUG, "Processed RetrieveChunksWork");
|
||||
}
|
||||
|
||||
void RetrieveChunksWork::doWork()
|
||||
{
|
||||
// take the node associated with the current target and send a RetrieveChunksMsg to
|
||||
// that node; the chunks are retrieved incrementally
|
||||
if ( node )
|
||||
{
|
||||
std::string nodeID = node->getAlias();
|
||||
FetchFsckChunkListStatus status = FetchFsckChunkListStatus_NOTSTARTED;
|
||||
unsigned resultCount = 0;
|
||||
|
||||
do
|
||||
{
|
||||
FetchFsckChunkListMsg fetchFsckChunkListMsg(RETRIEVE_CHUNKS_PACKET_SIZE, status,
|
||||
forceRestart);
|
||||
|
||||
const auto respMsg = MessagingTk::requestResponse(*node, fetchFsckChunkListMsg,
|
||||
NETMSGTYPE_FetchFsckChunkListResp);
|
||||
|
||||
if (respMsg)
|
||||
{
|
||||
auto* fetchFsckChunkListRespMsg = (FetchFsckChunkListRespMsg*) respMsg.get();
|
||||
|
||||
FsckChunkList& chunks = fetchFsckChunkListRespMsg->getChunkList();
|
||||
resultCount = chunks.size();
|
||||
|
||||
status = fetchFsckChunkListRespMsg->getStatus();
|
||||
|
||||
// check entry IDs
|
||||
for (auto it = chunks.begin(); it != chunks.end(); )
|
||||
{
|
||||
if (db::EntryID::tryFromStr(it->getID()).first
|
||||
&& it->getSavedPath()->str().size() <= db::Chunk::SAVED_PATH_SIZE)
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
++it;
|
||||
malformedChunks->append(*std::prev(it));
|
||||
chunks.erase(std::prev(it));
|
||||
}
|
||||
|
||||
if (status == FetchFsckChunkListStatus_NOTSTARTED)
|
||||
{
|
||||
// Another fsck run is still in progress or was aborted, and --forceRestart was not
|
||||
// set - this means we can't start a new chunk fetcher.
|
||||
started = false;
|
||||
startedBarrier.wait();
|
||||
startedBarrier.wait();
|
||||
return;
|
||||
}
|
||||
else if (status == FetchFsckChunkListStatus_READERROR)
|
||||
{
|
||||
throw FsckException("Read error occured while fetching chunks from node; nodeID: "
|
||||
+ nodeID);
|
||||
}
|
||||
|
||||
if (!started)
|
||||
{
|
||||
started = true;
|
||||
startedBarrier.wait();
|
||||
startedBarrier.wait();
|
||||
}
|
||||
|
||||
this->chunks->insert(chunks, this->chunksHandle);
|
||||
|
||||
numChunksFound->increase(resultCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw FsckException("Communication error occured with node; nodeID: " + nodeID);
|
||||
}
|
||||
|
||||
if ( Program::getApp()->getShallAbort() )
|
||||
break;
|
||||
|
||||
} while ( (resultCount > 0) || (status == FetchFsckChunkListStatus_RUNNING) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// basically this should never ever happen
|
||||
log.logErr("Requested node does not exist");
|
||||
throw FsckException("Requested node does not exist");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Waits until started conditin is is signalled and returns the value of stared
|
||||
* @param isStarted ptr to boolean which is set to whether the server replied it actually started
|
||||
* the process. Note: this is a ptr and not a return to ensure the member variable
|
||||
* started is no longer accessed after doWork is finished and the object possibly
|
||||
* already deleted.
|
||||
*/
|
||||
void RetrieveChunksWork::waitForStarted(bool* isStarted)
|
||||
{
|
||||
startedBarrier.wait();
|
||||
*isStarted = started;
|
||||
startedBarrier.wait();
|
||||
}
|
||||
57
fsck/source/components/worker/RetrieveChunksWork.h
Normal file
57
fsck/source/components/worker/RetrieveChunksWork.h
Normal file
@@ -0,0 +1,57 @@
|
||||
#ifndef RETRIEVECHUNKSWORK_H
|
||||
#define RETRIEVECHUNKSWORK_H
|
||||
|
||||
/*
|
||||
* retrieve all chunks from one storage server and save them to DB
|
||||
*/
|
||||
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/threading/Barrier.h>
|
||||
#include <common/toolkit/SynchronizedCounter.h>
|
||||
#include <database/FsckDB.h>
|
||||
#include <database/FsckDBTable.h>
|
||||
|
||||
// the size of one response packet, i.e. how many chunks are asked for at once
|
||||
#define RETRIEVE_CHUNKS_PACKET_SIZE 400
|
||||
|
||||
class RetrieveChunksWork : public Work
|
||||
{
|
||||
public:
|
||||
/*
|
||||
* @param db database instance
|
||||
* @param node pointer to the node to retrieve data from
|
||||
* @param counter a pointer to a Synchronized counter; this is incremented by one at the end
|
||||
* and the calling thread can wait for the counter
|
||||
* @param numChunksFound
|
||||
* @param forceRestart In case the storage servers' chunk fetchers still have data from a
|
||||
* previous run, force a restart instead of aborting with an error
|
||||
*/
|
||||
RetrieveChunksWork(FsckDB* db, NodeHandle node, SynchronizedCounter* counter,
|
||||
AtomicUInt64* numChunksFound, bool forceRestart);
|
||||
virtual ~RetrieveChunksWork();
|
||||
void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
|
||||
|
||||
void waitForStarted(bool* isStarted);
|
||||
|
||||
|
||||
private:
|
||||
LogContext log;
|
||||
NodeHandle node;
|
||||
SynchronizedCounter* counter;
|
||||
AtomicUInt64* numChunksFound;
|
||||
|
||||
FsckDBChunksTable* chunks;
|
||||
FsckDBChunksTable::BulkHandle chunksHandle;
|
||||
|
||||
DiskList<FsckChunk>* malformedChunks;
|
||||
|
||||
bool forceRestart;
|
||||
|
||||
void doWork();
|
||||
|
||||
bool started;
|
||||
Barrier startedBarrier;
|
||||
};
|
||||
|
||||
#endif /* RETRIEVECHUNKSWORK_H */
|
||||
222
fsck/source/components/worker/RetrieveDirEntriesWork.cpp
Normal file
222
fsck/source/components/worker/RetrieveDirEntriesWork.cpp
Normal file
@@ -0,0 +1,222 @@
|
||||
#include "RetrieveDirEntriesWork.h"
|
||||
#include <common/net/message/fsck/RetrieveDirEntriesMsg.h>
|
||||
#include <common/net/message/fsck/RetrieveDirEntriesRespMsg.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/toolkit/MetaStorageTk.h>
|
||||
#include <database/FsckDBException.h>
|
||||
#include <toolkit/FsckException.h>
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
#include <set>
|
||||
|
||||
RetrieveDirEntriesWork::RetrieveDirEntriesWork(FsckDB* db, Node& node, SynchronizedCounter* counter,
|
||||
AtomicUInt64& errors, unsigned hashDirStart, unsigned hashDirEnd,
|
||||
AtomicUInt64* numDentriesFound, AtomicUInt64* numFileInodesFound,
|
||||
std::set<FsckTargetID>& usedTargets) :
|
||||
log("RetrieveDirEntriesWork"), node(node), counter(counter), errors(&errors),
|
||||
numDentriesFound(numDentriesFound), numFileInodesFound(numFileInodesFound),
|
||||
usedTargets(&usedTargets), hashDirStart(hashDirStart), hashDirEnd(hashDirEnd),
|
||||
dentries(db->getDentryTable()), dentriesHandle(dentries->newBulkHandle()),
|
||||
files(db->getFileInodesTable()), filesHandle(files->newBulkHandle()),
|
||||
contDirs(db->getContDirsTable()), contDirsHandle(contDirs->newBulkHandle())
|
||||
{
|
||||
}
|
||||
|
||||
void RetrieveDirEntriesWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
|
||||
unsigned bufOutLen)
|
||||
{
|
||||
log.log(4, "Processing RetrieveDirEntriesWork");
|
||||
|
||||
try
|
||||
{
|
||||
doWork(false);
|
||||
doWork(true);
|
||||
// flush buffers before signaling completion
|
||||
dentries->flush(dentriesHandle);
|
||||
files->flush(filesHandle);
|
||||
contDirs->flush(contDirsHandle);
|
||||
// work package finished => increment counter
|
||||
this->counter->incCount();
|
||||
}
|
||||
catch (std::exception &e)
|
||||
{
|
||||
// exception thrown, but work package is finished => increment counter
|
||||
this->counter->incCount();
|
||||
|
||||
// after incrementing counter, re-throw exception
|
||||
throw;
|
||||
}
|
||||
|
||||
log.log(4, "Processed RetrieveDirEntriesWork");
|
||||
}
|
||||
|
||||
void RetrieveDirEntriesWork::doWork(bool isBuddyMirrored)
|
||||
{
|
||||
for ( unsigned firstLevelhashDirNum = hashDirStart; firstLevelhashDirNum <= hashDirEnd;
|
||||
firstLevelhashDirNum++ )
|
||||
{
|
||||
for ( unsigned secondLevelhashDirNum = 0;
|
||||
secondLevelhashDirNum < META_DENTRIES_LEVEL2_SUBDIR_NUM; secondLevelhashDirNum++ )
|
||||
{
|
||||
unsigned hashDirNum = StorageTk::mergeHashDirs(firstLevelhashDirNum,
|
||||
secondLevelhashDirNum);
|
||||
|
||||
int64_t hashDirOffset = 0;
|
||||
int64_t contDirOffset = 0;
|
||||
std::string currentContDirID;
|
||||
int resultCount = 0;
|
||||
|
||||
do
|
||||
{
|
||||
RetrieveDirEntriesMsg retrieveDirEntriesMsg(hashDirNum, currentContDirID,
|
||||
RETRIEVE_DIR_ENTRIES_PACKET_SIZE, hashDirOffset, contDirOffset, isBuddyMirrored);
|
||||
|
||||
const auto respMsg = MessagingTk::requestResponse(node, retrieveDirEntriesMsg,
|
||||
NETMSGTYPE_RetrieveDirEntriesResp);
|
||||
|
||||
if (respMsg)
|
||||
{
|
||||
auto* retrieveDirEntriesRespMsg = (RetrieveDirEntriesRespMsg*) respMsg.get();
|
||||
|
||||
// set new parameters
|
||||
currentContDirID = retrieveDirEntriesRespMsg->getCurrentContDirID();
|
||||
hashDirOffset = retrieveDirEntriesRespMsg->getNewHashDirOffset();
|
||||
contDirOffset = retrieveDirEntriesRespMsg->getNewContDirOffset();
|
||||
|
||||
// parse directory entries
|
||||
FsckDirEntryList& dirEntries = retrieveDirEntriesRespMsg->getDirEntries();
|
||||
// this is the actual result count we are interested in, because if no dirEntries
|
||||
// were read, there is nothing left on the server
|
||||
|
||||
resultCount = dirEntries.size();
|
||||
|
||||
// check dentry entry IDs
|
||||
for (auto it = dirEntries.begin(); it != dirEntries.end(); )
|
||||
{
|
||||
if (db::EntryID::tryFromStr(it->getID()).first
|
||||
&& db::EntryID::tryFromStr(it->getParentDirID()).first)
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Found dentry with invalid entry IDs.",
|
||||
("node", it->getSaveNodeID()),
|
||||
("isBuddyMirrored", it->getIsBuddyMirrored()),
|
||||
("entryID", it->getID()),
|
||||
("parentEntryID", it->getParentDirID()));
|
||||
|
||||
++it;
|
||||
errors->increase();
|
||||
dirEntries.erase(std::prev(it));
|
||||
}
|
||||
|
||||
this->dentries->insert(dirEntries, this->dentriesHandle);
|
||||
|
||||
numDentriesFound->increase(resultCount);
|
||||
|
||||
// parse inlined file inodes
|
||||
FsckFileInodeList& inlinedFileInodes =
|
||||
retrieveDirEntriesRespMsg->getInlinedFileInodes();
|
||||
|
||||
// check inode entry IDs
|
||||
for (auto it = inlinedFileInodes.begin(); it != inlinedFileInodes.end(); )
|
||||
{
|
||||
if (db::EntryID::tryFromStr(it->getID()).first
|
||||
&& db::EntryID::tryFromStr(it->getParentDirID()).first
|
||||
&& (!it->getPathInfo()->hasOrigFeature()
|
||||
|| db::EntryID::tryFromStr(
|
||||
it->getPathInfo()->getOrigParentEntryID()).first))
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Found inode with invalid entry IDs.",
|
||||
("node", it->getSaveNodeID()),
|
||||
("isBuddyMirrored", it->getIsBuddyMirrored()),
|
||||
("entryID", it->getID()),
|
||||
("parentEntryID", it->getParentDirID()),
|
||||
("origParent", it->getPathInfo()->getOrigParentEntryID()));
|
||||
|
||||
++it;
|
||||
errors->increase();
|
||||
inlinedFileInodes.erase(std::prev(it));
|
||||
}
|
||||
|
||||
struct ops
|
||||
{
|
||||
static bool dentryCmp(const FsckDirEntry& a, const FsckDirEntry& b)
|
||||
{
|
||||
return a.getID() < b.getID();
|
||||
}
|
||||
|
||||
static bool inodeCmp(const FsckFileInode& a, const FsckFileInode& b)
|
||||
{
|
||||
return a.getID() < b.getID();
|
||||
}
|
||||
};
|
||||
|
||||
dirEntries.sort(ops::dentryCmp);
|
||||
inlinedFileInodes.sort(ops::inodeCmp);
|
||||
|
||||
this->files->insert(inlinedFileInodes, this->filesHandle);
|
||||
|
||||
numFileInodesFound->increase(inlinedFileInodes.size());
|
||||
|
||||
// add used targetIDs
|
||||
for ( FsckFileInodeListIter iter = inlinedFileInodes.begin();
|
||||
iter != inlinedFileInodes.end(); iter++ )
|
||||
{
|
||||
FsckTargetIDType fsckTargetIDType;
|
||||
|
||||
if (iter->getStripePatternType() == FsckStripePatternType_BUDDYMIRROR)
|
||||
fsckTargetIDType = FsckTargetIDType_BUDDYGROUP;
|
||||
else
|
||||
fsckTargetIDType = FsckTargetIDType_TARGET;
|
||||
|
||||
for (auto targetsIter = iter->getStripeTargets().begin();
|
||||
targetsIter != iter->getStripeTargets().end(); targetsIter++)
|
||||
{
|
||||
this->usedTargets->insert(FsckTargetID(*targetsIter, fsckTargetIDType) );
|
||||
}
|
||||
}
|
||||
|
||||
// parse all new cont. directories
|
||||
FsckContDirList& contDirs = retrieveDirEntriesRespMsg->getContDirs();
|
||||
|
||||
// check entry IDs
|
||||
for (auto it = contDirs.begin(); it != contDirs.end(); )
|
||||
{
|
||||
if (db::EntryID::tryFromStr(it->getID()).first)
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Found content directory with invalid entry ID.",
|
||||
("node", it->getSaveNodeID()),
|
||||
("isBuddyMirrored", it->getIsBuddyMirrored()),
|
||||
("entryID", it->getID()));
|
||||
|
||||
++it;
|
||||
errors->increase();
|
||||
contDirs.erase(std::prev(it));
|
||||
}
|
||||
|
||||
this->contDirs->insert(contDirs, this->contDirsHandle);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw FsckException("Communication error occured with node " + node.getAlias());
|
||||
}
|
||||
|
||||
// if any of the worker threads threw an exception, we should stop now!
|
||||
if ( Program::getApp()->getShallAbort() )
|
||||
return;
|
||||
|
||||
} while ( resultCount > 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
62
fsck/source/components/worker/RetrieveDirEntriesWork.h
Normal file
62
fsck/source/components/worker/RetrieveDirEntriesWork.h
Normal file
@@ -0,0 +1,62 @@
|
||||
#ifndef RETRIEVEDIRENTRIESWORK_H
|
||||
#define RETRIEVEDIRENTRIESWORK_H
|
||||
|
||||
/*
|
||||
* retrieve all dir entries from one node, inside a specified range of hashDirs and save them to DB
|
||||
*/
|
||||
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/toolkit/SynchronizedCounter.h>
|
||||
|
||||
#include <database/FsckDB.h>
|
||||
#include <database/FsckDBTable.h>
|
||||
|
||||
// the size of one response packet, i.e. how many dentries are asked for at once
|
||||
#define RETRIEVE_DIR_ENTRIES_PACKET_SIZE 500
|
||||
|
||||
class RetrieveDirEntriesWork : public Work
|
||||
{
|
||||
public:
|
||||
/*
|
||||
* @param db database instance
|
||||
* @param node the node to retrieve data from
|
||||
* @param counter a pointer to a Synchronized counter; this is incremented by one at the end
|
||||
* and the calling thread can wait for the counter
|
||||
* @param hashDirStart the first top-level hashDir to open
|
||||
* @param hashDirEnd the last top-level hashDir to open
|
||||
* @param numDentriesFound
|
||||
* @param numFileInodesFound
|
||||
*/
|
||||
RetrieveDirEntriesWork(FsckDB* db, Node& node, SynchronizedCounter* counter,
|
||||
AtomicUInt64& errors, unsigned hashDirStart, unsigned hashDirEnd,
|
||||
AtomicUInt64* numDentriesFound, AtomicUInt64* numFileInodesFound,
|
||||
std::set<FsckTargetID>& usedTargets);
|
||||
|
||||
void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
|
||||
|
||||
private:
|
||||
LogContext log;
|
||||
Node& node;
|
||||
SynchronizedCounter* counter;
|
||||
AtomicUInt64* errors;
|
||||
AtomicUInt64* numDentriesFound;
|
||||
AtomicUInt64* numFileInodesFound;
|
||||
std::set<FsckTargetID>* usedTargets;
|
||||
|
||||
unsigned hashDirStart;
|
||||
unsigned hashDirEnd;
|
||||
|
||||
FsckDBDentryTable* dentries;
|
||||
FsckDBDentryTable::BulkHandle dentriesHandle;
|
||||
|
||||
FsckDBFileInodesTable* files;
|
||||
FsckDBFileInodesTable::BulkHandle filesHandle;
|
||||
|
||||
FsckDBContDirsTable* contDirs;
|
||||
FsckDBContDirsTable::BulkHandle contDirsHandle;
|
||||
|
||||
void doWork(bool isBuddyMirrored);
|
||||
};
|
||||
|
||||
#endif /* RETRIEVEDIRENTRIESWORK_H */
|
||||
125
fsck/source/components/worker/RetrieveFsIDsWork.cpp
Normal file
125
fsck/source/components/worker/RetrieveFsIDsWork.cpp
Normal file
@@ -0,0 +1,125 @@
|
||||
#include "RetrieveFsIDsWork.h"
|
||||
#include <common/net/message/fsck/RetrieveFsIDsMsg.h>
|
||||
#include <common/net/message/fsck/RetrieveFsIDsRespMsg.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/toolkit/MetaStorageTk.h>
|
||||
#include <database/FsckDBException.h>
|
||||
#include <toolkit/FsckException.h>
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
|
||||
RetrieveFsIDsWork::RetrieveFsIDsWork(FsckDB* db, Node& node, SynchronizedCounter* counter,
|
||||
AtomicUInt64& errors, unsigned hashDirStart, unsigned hashDirEnd) :
|
||||
log("RetrieveFsIDsWork"), node(node), counter(counter), errors(&errors),
|
||||
hashDirStart(hashDirStart), hashDirEnd(hashDirEnd),
|
||||
table(db->getFsIDsTable()), bulkHandle(table->newBulkHandle())
|
||||
{
|
||||
}
|
||||
|
||||
RetrieveFsIDsWork::~RetrieveFsIDsWork()
|
||||
{
|
||||
}
|
||||
|
||||
void RetrieveFsIDsWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
|
||||
unsigned bufOutLen)
|
||||
{
|
||||
log.log(4, "Processing RetrieveFsIDsWork");
|
||||
|
||||
try
|
||||
{
|
||||
doWork(false);
|
||||
doWork(true);
|
||||
table->flush(bulkHandle);
|
||||
// work package finished => increment counter
|
||||
this->counter->incCount();
|
||||
|
||||
}
|
||||
catch (std::exception &e)
|
||||
{
|
||||
// exception thrown, but work package is finished => increment counter
|
||||
this->counter->incCount();
|
||||
|
||||
// after incrementing counter, re-throw exception
|
||||
throw;
|
||||
}
|
||||
|
||||
log.log(4, "Processed RetrieveFsIDsWork");
|
||||
}
|
||||
|
||||
void RetrieveFsIDsWork::doWork(bool isBuddyMirrored)
|
||||
{
|
||||
for ( unsigned firstLevelhashDirNum = hashDirStart; firstLevelhashDirNum <= hashDirEnd;
|
||||
firstLevelhashDirNum++ )
|
||||
{
|
||||
for ( unsigned secondLevelhashDirNum = 0;
|
||||
secondLevelhashDirNum < META_DENTRIES_LEVEL2_SUBDIR_NUM; secondLevelhashDirNum++ )
|
||||
{
|
||||
unsigned hashDirNum = StorageTk::mergeHashDirs(firstLevelhashDirNum,
|
||||
secondLevelhashDirNum);
|
||||
|
||||
int64_t hashDirOffset = 0;
|
||||
int64_t contDirOffset = 0;
|
||||
std::string currentContDirID;
|
||||
int resultCount = 0;
|
||||
|
||||
do
|
||||
{
|
||||
RetrieveFsIDsMsg retrieveFsIDsMsg(hashDirNum, isBuddyMirrored, currentContDirID,
|
||||
RETRIEVE_FSIDS_PACKET_SIZE, hashDirOffset, contDirOffset);
|
||||
|
||||
const auto respMsg = MessagingTk::requestResponse(node, retrieveFsIDsMsg,
|
||||
NETMSGTYPE_RetrieveFsIDsResp);
|
||||
|
||||
if (respMsg)
|
||||
{
|
||||
auto* retrieveFsIDsRespMsg = (RetrieveFsIDsRespMsg*) respMsg.get();
|
||||
|
||||
// set new parameters
|
||||
currentContDirID = retrieveFsIDsRespMsg->getCurrentContDirID();
|
||||
hashDirOffset = retrieveFsIDsRespMsg->getNewHashDirOffset();
|
||||
contDirOffset = retrieveFsIDsRespMsg->getNewContDirOffset();
|
||||
|
||||
// parse FS-IDs
|
||||
FsckFsIDList& fsIDs = retrieveFsIDsRespMsg->getFsIDs();
|
||||
|
||||
// this is the actual result count we are interested in, because if no fsIDs
|
||||
// were read, there is nothing left on the server
|
||||
resultCount = fsIDs.size();
|
||||
|
||||
// check entry IDs
|
||||
for (auto it = fsIDs.begin(); it != fsIDs.end(); )
|
||||
{
|
||||
if (db::EntryID::tryFromStr(it->getID()).first
|
||||
&& db::EntryID::tryFromStr(it->getParentDirID()).first)
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Found fsid file with invalid entry IDs.",
|
||||
("node", it->getSaveNodeID()),
|
||||
("isBuddyMirrored", it->getIsBuddyMirrored()),
|
||||
("entryID", it->getID()),
|
||||
("parentEntryID", it->getParentDirID()));
|
||||
|
||||
++it;
|
||||
errors->increase();
|
||||
fsIDs.erase(std::prev(it));
|
||||
}
|
||||
|
||||
this->table->insert(fsIDs, this->bulkHandle);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw FsckException("Communication error occured with node " + node.getAlias());
|
||||
}
|
||||
|
||||
// if any of the worker threads threw an exception, we should stop now!
|
||||
if ( Program::getApp()->getShallAbort() )
|
||||
return;
|
||||
|
||||
} while ( resultCount > 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
49
fsck/source/components/worker/RetrieveFsIDsWork.h
Normal file
49
fsck/source/components/worker/RetrieveFsIDsWork.h
Normal file
@@ -0,0 +1,49 @@
|
||||
#ifndef RETRIEVEFSIDSWORK_H
|
||||
#define RETRIEVEFSIDSWORK_H
|
||||
|
||||
/*
|
||||
* retrieve all FS-IDs from one node, inside a specified range of hashDirs and save them to DB
|
||||
*/
|
||||
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/toolkit/SynchronizedCounter.h>
|
||||
|
||||
#include <database/FsckDB.h>
|
||||
#include <database/FsckDBTable.h>
|
||||
|
||||
// the size of one response packet, i.e. how many fsids are asked for at once
|
||||
#define RETRIEVE_FSIDS_PACKET_SIZE 1000
|
||||
|
||||
class RetrieveFsIDsWork : public Work
|
||||
{
|
||||
public:
|
||||
/*
|
||||
* @param db database instance
|
||||
* @param node the node to retrieve data from
|
||||
* @param counter a pointer to a Synchronized counter; this is incremented by one at the end
|
||||
* and the calling thread can wait for the counter
|
||||
* @param hashDirStart the first top-level hashDir to open
|
||||
* @param hashDirEnd the last top-level hashDir to open
|
||||
*/
|
||||
RetrieveFsIDsWork(FsckDB* db, Node& node, SynchronizedCounter* counter, AtomicUInt64& errors,
|
||||
unsigned hashDirStart, unsigned hashDirEnd);
|
||||
virtual ~RetrieveFsIDsWork();
|
||||
void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
|
||||
|
||||
private:
|
||||
LogContext log;
|
||||
Node& node;
|
||||
SynchronizedCounter* counter;
|
||||
AtomicUInt64* errors;
|
||||
|
||||
unsigned hashDirStart;
|
||||
unsigned hashDirEnd;
|
||||
|
||||
FsckDBFsIDsTable* table;
|
||||
FsckDBFsIDsTable::BulkHandle bulkHandle;
|
||||
|
||||
void doWork(bool isBuddyMirrored);
|
||||
};
|
||||
|
||||
#endif /* RETRIEVEFSIDSWORK_H */
|
||||
187
fsck/source/components/worker/RetrieveInodesWork.cpp
Normal file
187
fsck/source/components/worker/RetrieveInodesWork.cpp
Normal file
@@ -0,0 +1,187 @@
|
||||
#include "RetrieveInodesWork.h"
|
||||
#include <common/net/message/fsck/RetrieveInodesMsg.h>
|
||||
#include <common/net/message/fsck/RetrieveInodesRespMsg.h>
|
||||
#include <common/toolkit/MetaStorageTk.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <database/FsckDBException.h>
|
||||
#include <toolkit/FsckException.h>
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
#include <set>
|
||||
|
||||
RetrieveInodesWork::RetrieveInodesWork(FsckDB* db, Node& node, SynchronizedCounter* counter,
|
||||
AtomicUInt64& errors, unsigned hashDirStart, unsigned hashDirEnd,
|
||||
AtomicUInt64* numFileInodesFound, AtomicUInt64* numDirInodesFound,
|
||||
std::set<FsckTargetID>& usedTargets) :
|
||||
log("RetrieveInodesWork"), node(node), counter(counter), errors(&errors),
|
||||
usedTargets(&usedTargets), hashDirStart(hashDirStart), hashDirEnd(hashDirEnd),
|
||||
numFileInodesFound(numFileInodesFound), numDirInodesFound(numDirInodesFound),
|
||||
files(db->getFileInodesTable()), filesHandle(files->newBulkHandle()),
|
||||
dirs(db->getDirInodesTable()), dirsHandle(dirs->newBulkHandle())
|
||||
{
|
||||
}
|
||||
|
||||
void RetrieveInodesWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
|
||||
unsigned bufOutLen)
|
||||
{
|
||||
log.log(4, "Processing RetrieveInodesWork");
|
||||
|
||||
try
|
||||
{
|
||||
doWork(false);
|
||||
doWork(true);
|
||||
// flush buffers before signaling completion
|
||||
files->flush(filesHandle);
|
||||
dirs->flush(dirsHandle);
|
||||
// work package finished => increment counter
|
||||
this->counter->incCount();
|
||||
|
||||
}
|
||||
catch (std::exception &e)
|
||||
{
|
||||
// exception thrown, but work package is finished => increment counter
|
||||
this->counter->incCount();
|
||||
|
||||
// after incrementing counter, re-throw exception
|
||||
throw;
|
||||
}
|
||||
|
||||
log.log(4, "Processed RetrieveInodesWork");
|
||||
}
|
||||
|
||||
void RetrieveInodesWork::doWork(bool isBuddyMirrored)
|
||||
{
|
||||
const NumNodeID& metaRootID = Program::getApp()->getMetaRoot().getID();
|
||||
const NumNodeID& nodeID = node.getNumID();
|
||||
const NumNodeID nodeBuddyGroupID = NumNodeID(Program::getApp()->getMetaMirrorBuddyGroupMapper()
|
||||
->getBuddyGroupID(node.getNumID().val()));
|
||||
|
||||
for ( unsigned firstLevelhashDirNum = hashDirStart; firstLevelhashDirNum <= hashDirEnd;
|
||||
firstLevelhashDirNum++ )
|
||||
{
|
||||
for ( unsigned secondLevelhashDirNum = 0;
|
||||
secondLevelhashDirNum < META_DENTRIES_LEVEL2_SUBDIR_NUM; secondLevelhashDirNum++ )
|
||||
{
|
||||
unsigned hashDirNum = StorageTk::mergeHashDirs(firstLevelhashDirNum,
|
||||
secondLevelhashDirNum);
|
||||
|
||||
int64_t lastOffset = 0;
|
||||
size_t fileInodeCount;
|
||||
size_t dirInodeCount;
|
||||
|
||||
do
|
||||
{
|
||||
RetrieveInodesMsg retrieveInodesMsg(hashDirNum, lastOffset,
|
||||
RETRIEVE_INODES_PACKET_SIZE, isBuddyMirrored);
|
||||
|
||||
const auto respMsg = MessagingTk::requestResponse(node, retrieveInodesMsg,
|
||||
NETMSGTYPE_RetrieveInodesResp);
|
||||
if (respMsg)
|
||||
{
|
||||
auto* retrieveInodesRespMsg = (RetrieveInodesRespMsg*) respMsg.get();
|
||||
|
||||
// set new parameters
|
||||
lastOffset = retrieveInodesRespMsg->getLastOffset();
|
||||
|
||||
// parse all file inodes
|
||||
FsckFileInodeList& fileInodes = retrieveInodesRespMsg->getFileInodes();
|
||||
|
||||
// check inode entry IDs
|
||||
for (auto it = fileInodes.begin(); it != fileInodes.end(); )
|
||||
{
|
||||
if (db::EntryID::tryFromStr(it->getID()).first
|
||||
&& db::EntryID::tryFromStr(it->getParentDirID()).first
|
||||
&& (!it->getPathInfo()->hasOrigFeature()
|
||||
|| db::EntryID::tryFromStr(
|
||||
it->getPathInfo()->getOrigParentEntryID()).first))
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG(GENERAL, ERR, "Found inode with invalid entry IDs.",
|
||||
("node", it->getSaveNodeID()),
|
||||
("isBuddyMirrored", it->getIsBuddyMirrored()),
|
||||
("entryID", it->getID()),
|
||||
("parentEntryID", it->getParentDirID()),
|
||||
("origParent", it->getPathInfo()->getOrigParentEntryID()));
|
||||
|
||||
++it;
|
||||
errors->increase();
|
||||
fileInodes.erase(std::prev(it));
|
||||
}
|
||||
|
||||
// add targetIDs
|
||||
for (auto iter = fileInodes.begin(); iter != fileInodes.end(); iter++)
|
||||
{
|
||||
FsckTargetIDType fsckTargetIDType;
|
||||
|
||||
if (iter->getStripePatternType() == FsckStripePatternType_BUDDYMIRROR)
|
||||
fsckTargetIDType = FsckTargetIDType_BUDDYGROUP;
|
||||
else
|
||||
fsckTargetIDType = FsckTargetIDType_TARGET;
|
||||
|
||||
for (auto targetsIter = iter->getStripeTargets().begin();
|
||||
targetsIter != iter->getStripeTargets().end(); targetsIter++)
|
||||
{
|
||||
this->usedTargets->insert(FsckTargetID(*targetsIter, fsckTargetIDType) );
|
||||
}
|
||||
}
|
||||
|
||||
// parse all directory inodes
|
||||
FsckDirInodeList& dirInodes = retrieveInodesRespMsg->getDirInodes();
|
||||
|
||||
// check inode entry IDs
|
||||
for (auto it = dirInodes.begin(); it != dirInodes.end(); )
|
||||
{
|
||||
auto entryIDPair = db::EntryID::tryFromStr(it->getID());
|
||||
if (!entryIDPair.first ||
|
||||
!db::EntryID::tryFromStr(it->getParentDirID()).first)
|
||||
{
|
||||
LOG(GENERAL, ERR, "Found inode with invalid entry IDs.",
|
||||
("node", it->getSaveNodeID()),
|
||||
("isBuddyMirrored", it->getIsBuddyMirrored()),
|
||||
("entryID", it->getID()),
|
||||
("parentEntryID", it->getParentDirID()));
|
||||
|
||||
++it;
|
||||
errors->increase();
|
||||
dirInodes.erase(std::prev(it));
|
||||
continue;
|
||||
}
|
||||
|
||||
// remove root inodes from non root metas
|
||||
if (entryIDPair.second.isRootDir() &&
|
||||
((it->getIsBuddyMirrored() && nodeBuddyGroupID != metaRootID)
|
||||
|| (!it->getIsBuddyMirrored() && nodeID != metaRootID)))
|
||||
{
|
||||
++it;
|
||||
dirInodes.erase(std::prev(it));
|
||||
continue;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
|
||||
fileInodeCount = fileInodes.size();
|
||||
dirInodeCount = dirInodes.size();
|
||||
|
||||
this->files->insert(fileInodes, this->filesHandle);
|
||||
this->dirs->insert(dirInodes, this->dirsHandle);
|
||||
|
||||
numFileInodesFound->increase(fileInodeCount);
|
||||
numDirInodesFound->increase(dirInodeCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw FsckException("Communication error occured with node " + node.getAlias());
|
||||
}
|
||||
|
||||
// if any of the worker threads threw an exception, we should stop now!
|
||||
if ( Program::getApp()->getShallAbort() )
|
||||
return;
|
||||
|
||||
} while ( (fileInodeCount + dirInodeCount) > 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
58
fsck/source/components/worker/RetrieveInodesWork.h
Normal file
58
fsck/source/components/worker/RetrieveInodesWork.h
Normal file
@@ -0,0 +1,58 @@
|
||||
#ifndef RETRIEVEINODESWORK_H
|
||||
#define RETRIEVEINODESWORK_H
|
||||
|
||||
/*
|
||||
* retrieve all inodes from one node, inside a specified range of hashDirs and save them to DB
|
||||
|
||||
*/
|
||||
|
||||
#include <common/app/log/LogContext.h>
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/toolkit/SynchronizedCounter.h>
|
||||
#include <database/FsckDB.h>
|
||||
#include <database/FsckDBTable.h>
|
||||
|
||||
// the size of one response packet, i.e. how many inodes are asked for at once
|
||||
#define RETRIEVE_INODES_PACKET_SIZE 500
|
||||
|
||||
class RetrieveInodesWork : public Work
|
||||
{
|
||||
public:
|
||||
/*
|
||||
* @param db database instance
|
||||
* @param node the node to retrieve data from
|
||||
* @param counter a pointer to a Synchronized counter; this is incremented by one at the end
|
||||
* and the calling thread can wait for the counter
|
||||
* @param hashDirStart the first top-level hashDir to open
|
||||
* @param hashDirEnd the last top-level hashDir to open
|
||||
*/
|
||||
RetrieveInodesWork(FsckDB* db, Node& node, SynchronizedCounter* counter,
|
||||
AtomicUInt64& errors, unsigned hashDirStart, unsigned hashDirEnd,
|
||||
AtomicUInt64* numFileInodesFound, AtomicUInt64* numDirInodesFound,
|
||||
std::set<FsckTargetID>& usedTargets);
|
||||
|
||||
void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
|
||||
|
||||
private:
|
||||
LogContext log;
|
||||
Node& node;
|
||||
SynchronizedCounter* counter;
|
||||
AtomicUInt64* errors;
|
||||
std::set<FsckTargetID>* usedTargets;
|
||||
|
||||
unsigned hashDirStart;
|
||||
unsigned hashDirEnd;
|
||||
|
||||
AtomicUInt64* numFileInodesFound;
|
||||
AtomicUInt64* numDirInodesFound;
|
||||
|
||||
FsckDBFileInodesTable* files;
|
||||
FsckDBFileInodesTable::BulkHandle filesHandle;
|
||||
|
||||
FsckDBDirInodesTable* dirs;
|
||||
FsckDBDirInodesTable::BulkHandle dirsHandle;
|
||||
|
||||
void doWork(bool isBuddyMirrored);
|
||||
};
|
||||
|
||||
#endif /* RETRIEVEINODESWORK_H */
|
||||
Reference in New Issue
Block a user