New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

View File

@@ -0,0 +1,57 @@
#include "DatagramListener.h"
#include <common/net/message/NetMessageTypes.h>
DatagramListener::DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
AcknowledgmentStore* ackStore, unsigned short udpPort, bool restrictOutboundInterfaces):
AbstractDatagramListener("DGramLis", netFilter, localNicList, ackStore, udpPort,
restrictOutboundInterfaces)
{
}
void DatagramListener::handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg)
{
HighResolutionStats stats; // currently ignored
std::shared_ptr<StandardSocket> sock = findSenderSock(fromAddr->sin_addr);
if (sock == nullptr)
{
log.log(Log_WARNING, "Could not handle incoming message: no socket");
return;
}
NetMessage::ResponseContext rctx(fromAddr, sock.get(), sendBuf, DGRAMMGR_SENDBUF_SIZE, &stats);
const auto messageType = netMessageTypeToStr(msg->getMsgType());
switch(msg->getMsgType() )
{
// valid messages within this context
case NETMSGTYPE_Ack:
case NETMSGTYPE_Dummy:
case NETMSGTYPE_HeartbeatRequest:
case NETMSGTYPE_Heartbeat:
case NETMSGTYPE_MapTargets:
case NETMSGTYPE_PublishCapacities:
case NETMSGTYPE_RemoveNode:
case NETMSGTYPE_RefreshCapacityPools:
case NETMSGTYPE_RefreshStoragePools:
case NETMSGTYPE_RefreshTargetStates:
case NETMSGTYPE_SetMirrorBuddyGroup:
{
if(!msg->processIncoming(rctx) )
{
LOG(GENERAL, WARNING,
"Problem encountered during handling of incoming message.", messageType);
}
} break;
default:
{ // valid, but not within this context
log.logErr(
"Received a message that is invalid within the current context "
"from: " + Socket::ipaddrToStr(fromAddr->sin_addr) + "; "
"type: " + messageType );
} break;
};
}

View File

@@ -0,0 +1,18 @@
#pragma once
#include <common/components/AbstractDatagramListener.h>
class DatagramListener : public AbstractDatagramListener
{
public:
DatagramListener(NetFilter* netFilter, NicAddressList& localNicList,
AcknowledgmentStore* ackStore, unsigned short udpPort,
bool restrictOutboundInterfaces);
protected:
virtual void handleIncomingMsg(struct sockaddr_in* fromAddr, NetMessage* msg);
private:
};

View File

@@ -0,0 +1,51 @@
#include "DisposalGarbageCollector.h"
#include "app/App.h"
#include "program/Program.h"
#include <common/toolkit/DisposalCleaner.h>
FhgfsOpsErr deleteFile(unsigned& unlinked, Node& owner, const std::string& entryID, const bool isMirrored) {
const auto err = DisposalCleaner::unlinkFile(owner, entryID, isMirrored);
if (err == FhgfsOpsErr_COMMUNICATION)
LOG(GENERAL, ERR, "Communication error", entryID, isMirrored);
else if (err == FhgfsOpsErr_INUSE)
LOG(GENERAL, ERR, "File in use", entryID, isMirrored);
else if (err != FhgfsOpsErr_SUCCESS)
LOG(GENERAL, ERR, "Error", entryID, isMirrored, err);
else
(unlinked)++;
return FhgfsOpsErr_SUCCESS;
}
void handleError(Node&, FhgfsOpsErr err) {
LOG(GENERAL, ERR, "Disposal garbage collection run failed", err);
}
void disposalGarbageCollector() {
LOG(GENERAL, NOTICE, "Disposal garbage collection started");
auto app = Program::getApp();
unsigned unlinked = 0;
const std::vector<NodeHandle> nodes = {app->getMetaNodes()->referenceNode(app->getLocalNode().getNumID())};
DisposalCleaner dc(*app->getMetaBuddyGroupMapper(), true);
dc.run(nodes,
[&unlinked] (auto&& owner, auto&& entryID, auto&& isMirrored) {
return deleteFile(unlinked, owner, entryID, isMirrored);
},
handleError,
[&app] () { return app->getGcQueue()->getSelfTerminate(); }
);
LOG(GENERAL, NOTICE, "Disposal garbage collection finished", unlinked);
if(const auto wait = app->getConfig()->getTuneDisposalGCPeriod()) {
if(auto* queue = app->getGcQueue()) {
queue->enqueue(std::chrono::seconds(wait), disposalGarbageCollector);
}
}
}

View File

@@ -0,0 +1,4 @@
#pragma once
void disposalGarbageCollector();

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,42 @@
#pragma once
#include <common/storage/FileEvent.h>
#include <common/storage/EntryInfo.h>
#include <string>
struct EventContext
{
static constexpr uint32_t EVENTFLAG_NONE = 0;
static constexpr uint32_t EVENTFLAG_MIRRORED = (1 << 0); // Event is for a mirrored entry
static constexpr uint32_t EVENTFLAG_SECONDARY = (1 << 1); // Event generated by secondary node
std::string entryId;
std::string parentId;
unsigned msgUserId;
std::string targetParentId;
unsigned linkCount;
int64_t timestamp;
uint32_t eventFlags; // bitwise OR of EVENTFLAG_ values above.
};
EventContext makeEventContext(EntryInfo* entryInfo, std::string parentId, unsigned msgUserId,
std::string targetParentId, unsigned linkCount, bool isSecondary);
struct FileEventLoggerIds
{
uint32_t nodeId;
uint16_t buddyGroupId;
};
struct FileEventLoggerParams
{
std::string address;
FileEventLoggerIds ids;
};
struct FileEventLogger;
FileEventLogger *createFileEventLogger(FileEventLoggerParams const& params);
void destroyFileEventLogger(FileEventLogger *logger);
void logEvent(FileEventLogger *logger, FileEvent const& event, EventContext const& eventCtx);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,140 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/components/ComponentInitException.h>
#include <common/net/message/nodes/GetNodeCapacityPoolsMsg.h>
#include <common/net/message/nodes/GetNodeCapacityPoolsRespMsg.h>
#include <common/nodes/NodeStoreServers.h>
#include <common/storage/quota/QuotaData.h>
#include <common/threading/PThread.h>
#include <common/Common.h>
#include <storage/NodeOfflineWait.h>
#include <atomic>
#include <mutex>
class AbstractDatagramListener;
class InternodeSyncer : public PThread
{
public:
InternodeSyncer(TargetConsistencyState initialConsistencyState);
virtual ~InternodeSyncer() { }
static bool registerNode(AbstractDatagramListener* dgramLis);
static bool updateMetaStatesAndBuddyGroups(TargetConsistencyState& outConsistencyState,
bool publish);
static void syncClients(const std::vector<NodeHandle>& clientsList, bool allowRemoteComm);
static bool downloadAndSyncNodes();
static bool downloadAndSyncTargetMappings();
static bool downloadAndSyncStoragePools();
static bool downloadAndSyncTargetStatesAndBuddyGroups();
static void downloadAndSyncClients(bool requeue);
static bool updateMetaCapacityPools();
static bool updateMetaBuddyCapacityPools();
static bool downloadAllExceededQuotaLists(const StoragePoolPtrVec& storagePools);
static bool downloadExceededQuotaList(StoragePoolId storagePoolId, QuotaDataType idType,
QuotaLimitType exType, UIntList* outIDList, FhgfsOpsErr& error);
static void printSyncResults(NodeType nodeType, NumNodeIDList* addedNodes,
NumNodeIDList* removedNodes);
private:
LogContext log;
#if ATOMIC_BOOL_LOCK_FREE != 2
# warn atomic<bool> is not always lock-free
#endif
std::atomic<bool> forcePoolsUpdate; // true to force update of capacity pools
std::atomic<bool> forceTargetStatesUpdate; // true to force update of node state
std::atomic<bool> forcePublishCapacities; // true to force publishing free capacity
std::atomic<bool> forceStoragePoolsUpdate; // true to force update of storage pools
std::atomic<bool> forceCheckNetwork; // true to force checking of network changes
// Keeps track of the timeout during which the node may not send state reports because it is
// waiting to be offlined by the mgmtd.
NodeOfflineWait offlineWait;
Mutex nodeConsistencyStateMutex;
TargetConsistencyState nodeConsistencyState; // Node's own consistency state.
// Note: This is initialized when updateMetaStates... is called from App::downloadMgmtInfo.
AtomicUInt32 buddyResyncInProgress;
virtual void run();
void syncLoop();
static bool updateStorageCapacityPools();
static bool updateTargetBuddyCapacityPools();
static std::pair<bool, GetNodeCapacityPoolsRespMsg::PoolsMap> downloadCapacityPools(
CapacityPoolQueryType poolType);
void publishNodeCapacity();
void forceMgmtdPoolsRefresh();
// returns true if the local interfaces have changed
bool checkNetwork();
void dropIdleConns();
unsigned dropIdleConnsByStore(NodeStoreServers* nodes);
void getStatInfo(int64_t* outSizeTotal, int64_t* outSizeFree, int64_t* outInodesTotal,
int64_t* outInodesFree);
static TargetConsistencyState decideResync(const CombinedTargetState newState);
static bool publishNodeStateChange(const TargetConsistencyState oldState,
const TargetConsistencyState newState);
static bool downloadAllExceededQuotaLists(const StoragePoolPtr storagePool);
public:
// inliners
void setForcePoolsUpdate()
{
forcePoolsUpdate = true;
}
void setForceTargetStatesUpdate()
{
forceTargetStatesUpdate = true;
}
void setForcePublishCapacities()
{
forcePublishCapacities = true;
}
void setForceStoragePoolsUpdate()
{
forceStoragePoolsUpdate = true;
}
void setForceCheckNetwork()
{
forceCheckNetwork = true;
}
TargetConsistencyState getNodeConsistencyState()
{
std::lock_guard<Mutex> lock(nodeConsistencyStateMutex);
return nodeConsistencyState;
}
void setNodeConsistencyState(TargetConsistencyState newState)
{
std::lock_guard<Mutex> lock(nodeConsistencyStateMutex);
nodeConsistencyState = newState;
}
void setResyncInProgress(bool resyncInProgress)
{
this->buddyResyncInProgress.set(resyncInProgress);
}
bool getResyncInProgress()
{
return this->buddyResyncInProgress.read();
}
};

View File

@@ -0,0 +1,145 @@
#include "ModificationEventFlusher.h"
#include <common/toolkit/SynchronizedCounter.h>
#include <common/toolkit/ListTk.h>
#include <common/net/message/fsck/FsckModificationEventMsg.h>
#include <program/Program.h>
#include <mutex>
ModificationEventFlusher::ModificationEventFlusher()
: PThread("ModificationEventFlusher"),
log("ModificationEventFlusher"),
dGramLis(Program::getApp()->getDatagramListener() ),
workerList(Program::getApp()->getWorkers() ),
fsckMissedEvent(false)
{
NicAddressList nicList;
this->fsckNode = std::make_shared<Node>(NODETYPE_Invalid, "fsck", NumNodeID(), 0, 0, nicList);
NicListCapabilities localNicCaps;
NicAddressList localNicList = Program::getApp()->getLocalNicList();
NetworkInterfaceCard::supportedCapabilities(&localNicList, &localNicCaps);
this->fsckNode->getConnPool()->setLocalNicList(localNicList, localNicCaps);
}
void ModificationEventFlusher::run()
{
try
{
registerSignalHandler();
while ( !this->getSelfTerminate() )
{
while ( this->eventTypeBufferList.empty() )
{
{
const std::lock_guard<Mutex> lock(eventsAddedMutex);
this->eventsAddedCond.timedwait(&eventsAddedMutex, 2000);
}
if ( this->getSelfTerminate() )
goto stop_component;
}
// buffer list not empty... go ahead and send it
this->sendToFsck();
}
stop_component:
log.log(Log_DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
}
bool ModificationEventFlusher::add(ModificationEventType eventType, const std::string& entryID)
{
while (true)
{
{
const std::lock_guard<Mutex> lock(mutex);
if (this->eventTypeBufferList.size() < MODFLUSHER_MAXSIZE_EVENTLIST)
break;
}
// queue too long
// wait if something is flushed
{
const std::lock_guard<Mutex> lock(eventsFlushedMutex);
this->eventsFlushedCond.timedwait(&eventsFlushedMutex, 5000);
}
}
{
const std::lock_guard<Mutex> lock(mutex);
this->eventTypeBufferList.push_back((uint8_t)eventType);
this->entryIDBufferList.push_back(entryID);
}
{
const std::lock_guard<Mutex> lock(eventsAddedMutex);
this->eventsAddedCond.broadcast();
}
return true;
}
void ModificationEventFlusher::sendToFsck()
{
if (!fsckNode)
{
log.logErr("Fsck modification events are present, but fsck node is not set.");
this->fsckMissedEvent = true;
// stop logging
this->disableLoggingLocally(false);
return;
}
// get the first MODFLUSHER_SEND_AT_ONCE entries from each list and send them to fsck
// only have the mutex on the lists as long as we really need it
UInt8List eventTypeListCopy;
StringList entryIDListCopy;
{
const std::lock_guard<Mutex> lock(mutex);
UInt8ListIter eventTypeStart = this->eventTypeBufferList.begin();
UInt8ListIter eventTypeEnd = this->eventTypeBufferList.begin();
ListTk::advance(eventTypeBufferList, eventTypeEnd, MODFLUSHER_SEND_AT_ONCE);
StringListIter entryIDStart = this->entryIDBufferList.begin();
StringListIter entryIDEnd = this->entryIDBufferList.begin();
ListTk::advance(entryIDBufferList, entryIDEnd, MODFLUSHER_SEND_AT_ONCE);
eventTypeListCopy.splice(eventTypeListCopy.begin(), this->eventTypeBufferList, eventTypeStart,
eventTypeEnd);
entryIDListCopy.splice(entryIDListCopy.begin(), this->entryIDBufferList, entryIDStart,
entryIDEnd);
}
FsckModificationEventMsg fsckModificationEventMsg(&eventTypeListCopy, &entryIDListCopy,
this->fsckMissedEvent);
bool ackReceived = this->dGramLis->sendToNodeUDPwithAck(fsckNode, &fsckModificationEventMsg,
MODFLUSHER_WAIT_FOR_ACK_MS, MODFLUSHER_WAIT_FOR_ACK_RETRIES);
if (!ackReceived)
{
log.log(Log_CRITICAL,
"Did not receive an ack from fsck for a FsckModificationEventMsg");
this->fsckMissedEvent = true;
// stop logging
this->disableLoggingLocally(false);
}
const std::lock_guard<Mutex> lock(eventsFlushedMutex);
eventsFlushedCond.broadcast();
}

View File

@@ -0,0 +1,184 @@
#pragma once
#include <app/App.h>
#include <common/components/worker/Worker.h>
#include <common/components/worker/DecAtomicWork.h>
#include <common/components/worker/IncAtomicWork.h>
#include <common/components/worker/IncSyncedCounterWork.h>
#include <common/threading/Condition.h>
#include <common/threading/Barrier.h>
#include <common/toolkit/MetadataTk.h>
#include <components/worker/BarrierWork.h>
#include <components/DatagramListener.h>
#include <program/Program.h>
#include <mutex>
#define MODFLUSHER_MAXSIZE_EVENTLIST 10000
#define MODFLUSHER_SEND_AT_ONCE 10 // only very few events, because msg is UDP
#define MODFLUSHER_FLUSH_MAX_INTERVAL_MS 5000
#define MODFLUSHER_WAIT_FOR_ACK_MS 1000
#define MODFLUSHER_WAIT_FOR_ACK_RETRIES 100
/*
* Note: this class is only used by fsck at the moment; therefore it is designed for fsck
*/
class ModificationEventFlusher: public PThread
{
public:
ModificationEventFlusher();
virtual void run();
bool add(ModificationEventType eventType, const std::string& entryID);
private:
LogContext log;
DatagramListener* dGramLis;
std::list<Worker*>* workerList;
UInt8List eventTypeBufferList;
StringList entryIDBufferList;
// general mutex used to lock the buffer and the notification enabling and disabling
Mutex mutex;
Mutex eventsFlushedMutex;
Condition eventsFlushedCond;
Mutex eventsAddedMutex;
Condition eventsAddedCond;
AtomicSizeT loggingEnabled; // 1 if enabled
Mutex fsckMutex;
NodeHandle fsckNode;
bool fsckMissedEvent;
void sendToFsck();
public:
// inliners
/**
* @returns true if logging was enabled, false if it was alredy running
*/
bool enableLogging(unsigned fsckPortUDP, NicAddressList& fsckNicList, bool forceRestart)
{
std::unique_lock<Mutex> lock(mutex);
if (!forceRestart && loggingEnabled.read() > 0)
return false;
eventTypeBufferList.clear();
entryIDBufferList.clear();
this->loggingEnabled.set(1);
// set fsckParameters
setFsckParametersUnlocked(fsckPortUDP, fsckNicList);
lock.unlock();
// make sure all workers have noticed the changed loggingEnabled flag
stallAllWorkers(true, false);
return true;
}
bool disableLogging()
{
return disableLoggingLocally(true);
}
bool isLoggingEnabled()
{
return (this->loggingEnabled.read() != 0);
}
bool getFsckMissedEvent()
{
const std::lock_guard<Mutex> lock(fsckMutex);
return this->fsckMissedEvent;
}
private:
/*
* Note: if logging is already disabled, this function basically does nothing, but returns
* if the buffer is empty or not
* @param fromWorker set to true if this is called from a worker thread. Otherwise, the worker
* calling this will deadlock
* @return true if buffer is empty, false otherwise
*/
bool disableLoggingLocally(bool fromWorker)
{
loggingEnabled.setZero();
stallAllWorkers(fromWorker, true);
std::lock_guard<Mutex> lock(mutex);
// make sure list is empty and no worker is logging anymore
return this->eventTypeBufferList.empty();
}
void setFsckParametersUnlocked(unsigned portUDP, NicAddressList& nicList)
{
this->fsckMissedEvent = false;
this->fsckNode->updateInterfaces(portUDP, 0, nicList);
}
/**
* @param fromWorker This is called from a worker thread. In that case, this function blocks
* only until n-1 workers have reached the counter work item - because one
* of the workers is already blocked inside this function.
* @param flush Flush the modification event queue. Do this when stopping the modification
* event logger, because otherwise, workers might lock up trying to enqueue items
* which will never be sent to the Fsck.
*/
void stallAllWorkers(bool fromWorker, bool flush)
{
App* app = Program::getApp();
MultiWorkQueue* workQueue = app->getWorkQueue();
pthread_t threadID = PThread::getCurrentThreadID();
SynchronizedCounter notified;
for (auto workerIt = workerList->begin(); workerIt != workerList->end(); ++workerIt)
{
// don't enqueue it in the worker that processes this message (this would deadlock)
if (!PThread::threadIDEquals((*workerIt)->getID(), threadID) || !fromWorker)
{
PersonalWorkQueue* personalQ = (*workerIt)->getPersonalWorkQueue();
workQueue->addPersonalWork(new IncSyncedCounterWork(&notified), personalQ);
}
}
while (true)
{
const bool done = notified.timedWaitForCount(workerList->size() - (fromWorker ? 1 : 0),
1000);
if (done)
{
break;
}
else if (flush)
{
{
const std::lock_guard<Mutex> lock(mutex);
this->eventTypeBufferList.clear();
this->entryIDBufferList.clear();
}
{
const std::lock_guard<Mutex> lock(eventsFlushedMutex);
eventsFlushedCond.broadcast();
}
}
}
}
};

View File

@@ -0,0 +1,530 @@
#include <program/Program.h>
#include <common/components/worker/IncSyncedCounterWork.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
#include <common/threading/Barrier.h>
#include <common/toolkit/DebugVariable.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <app/App.h>
#include <components/buddyresyncer/BuddyResyncerBulkSyncSlave.h>
#include <components/buddyresyncer/BuddyResyncerModSyncSlave.h>
#include <components/worker/BarrierWork.h>
#include <toolkit/BuddyCommTk.h>
#include "BuddyResyncJob.h"
BuddyResyncJob::BuddyResyncJob() :
PThread("BuddyResyncJob"),
state(BuddyResyncJobState_NOTSTARTED),
startTime(0), endTime(0),
gatherSlave(boost::make_unique<BuddyResyncerGatherSlave>(&syncCandidates))
{
App* app = Program::getApp();
Config* cfg = app->getConfig();
buddyNodeID =
NumNodeID(app->getMetaBuddyGroupMapper()->getBuddyTargetID(app->getLocalNodeNumID().val()));
const unsigned numSyncSlaves = std::max<unsigned>(cfg->getTuneNumResyncSlaves(), 1);
for (size_t i = 0; i < numSyncSlaves; i++)
bulkSyncSlaves.emplace_back(
boost::make_unique<BuddyResyncerBulkSyncSlave>(*this, &syncCandidates, i, buddyNodeID));
sessionStoreResyncer = boost::make_unique<SessionStoreResyncer>(buddyNodeID);
modSyncSlave = boost::make_unique<BuddyResyncerModSyncSlave>(*this, &syncCandidates, 1, buddyNodeID);
}
BuddyResyncJob::~BuddyResyncJob() = default;
void BuddyResyncJob::run()
{
const char* logContext = "Run resync job";
InternodeSyncer* internodeSyncer = Program::getApp()->getInternodeSyncer();
App* app = Program::getApp();
WorkerList* workers = app->getWorkers();
NodeStore* metaNodes = app->getMetaNodes();
const std::string metaPath = app->getMetaPath();
const std::string metaBuddyMirPath = app->getMetaPath() + "/" + CONFIG_BUDDYMIRROR_SUBDIR_NAME;
Barrier workerBarrier(workers->size() + 1);
bool workersStopped = false;
startTime = time(NULL);
syncCandidates.clear();
auto buddyNode = metaNodes->referenceNode(buddyNodeID);
if (!buddyNode)
{
LOG(MIRRORING, ERR, "Unable to resolve buddy node. Resync will not start.");
setState(BuddyResyncJobState_FAILURE);
goto cleanup;
}
DEBUG_ENV_VAR(unsigned, DIE_AT_RESYNC_N, 0, "BEEGFS_RESYNC_DIE_AT_N");
if (DIE_AT_RESYNC_N) {
static unsigned resyncs = 0;
// for #479: terminating a server at this point caused the workers to terminate before the
// resyncer had communicated with them, causing a deadlock on shutdown
if (++resyncs == DIE_AT_RESYNC_N) {
::kill(0, SIGTERM);
sleep(4);
}
}
stopAllWorkersOn(workerBarrier);
{
// Notify buddy that resync started and wait for confirmation
StorageResyncStartedMsg msg(buddyNodeID.val());
const auto respMsg = MessagingTk::requestResponse(*buddyNode, msg,
NETMSGTYPE_StorageResyncStartedResp);
if (!respMsg)
{
LogContext(logContext).logErr("Unable to notify buddy about resync attempt. "
"Resync will not start.");
setState(BuddyResyncJobState_FAILURE);
workerBarrier.wait();
goto cleanup;
}
// resync could have been aborted before we got here. if so, exit as soon as possible without
// setting the resync job state to something else.
{
std::unique_lock<Mutex> lock(stateMutex);
if (state == BuddyResyncJobState_INTERRUPTED)
{
lock.unlock();
workerBarrier.wait();
goto cleanup;
}
state = BuddyResyncJobState_RUNNING;
}
internodeSyncer->setResyncInProgress(true);
const bool startGatherSlaveRes = startGatherSlaves();
if (!startGatherSlaveRes)
{
setState(BuddyResyncJobState_FAILURE);
workerBarrier.wait();
goto cleanup;
}
const bool startResyncSlaveRes = startSyncSlaves();
if (!startResyncSlaveRes)
{
setState(BuddyResyncJobState_FAILURE);
workerBarrier.wait();
goto cleanup;
}
}
workerBarrier.wait();
LOG_DEBUG(__func__, Log_DEBUG, "Going to join gather slaves.");
joinGatherSlaves();
LOG_DEBUG(__func__, Log_DEBUG, "Joined gather slaves.");
LOG_DEBUG(__func__, Log_DEBUG, "Going to join sync slaves.");
// gather slaves have finished. Tell sync slaves to stop when work packages are empty and wait.
for (auto it = bulkSyncSlaves.begin(); it != bulkSyncSlaves.end(); ++it)
{
(*it)->setOnlyTerminateIfIdle(true);
(*it)->selfTerminate();
}
for (auto it = bulkSyncSlaves.begin(); it != bulkSyncSlaves.end(); ++it)
(*it)->join();
// here we can be in one of two situations:
// 1. bulk resync has succeeded. we then totally stop the workers: the session store must be in
// a quiescent state for resync, so for simplicitly, we suspend all client operations here.
// we do not want to do this any earlier than this point, because bulk syncers may take a
// very long time to complete.
// 2. bulk resync has failed. in this case, the bulk syncers have aborted the currently running
// job, and the mod syncer is either dead or in the process of dying. here we MUST NOT stop
// the workers, because they are very likely blocked on the mod sync queue already and will
// not unblock before the queue is cleared.
if (getState() == BuddyResyncJobState_RUNNING)
{
stopAllWorkersOn(workerBarrier);
workersStopped = true;
}
modSyncSlave->setOnlyTerminateIfIdle(true);
modSyncSlave->selfTerminate();
modSyncSlave->join();
// gatherers are done and the workers have been stopped, we can safely resync the session now.
LOG_DEBUG(__func__, Log_DEBUG, "Joined sync slaves.");
// Perform session store resync
// the job may have been aborted or terminated by errors. in this case, do not resync the session
// store. end the sync as quickly as possible.
if (getState() == BuddyResyncJobState_RUNNING)
sessionStoreResyncer->doSync();
// session store is now synced, and future actions can be forwarded safely. we do not restart
// the workers here because the resync may still enter FAILED state, and we don't want to forward
// to the secondary in this case.
cleanup:
bool syncErrors = false;
{
std::lock_guard<Mutex> lock(gatherSlave->stateMutex);
while (gatherSlave->isRunning)
gatherSlave->isRunningChangeCond.wait(&gatherSlave->stateMutex);
syncErrors |= gatherSlave->getStats().errors != 0;
}
for (auto it = bulkSyncSlaves.begin(); it != bulkSyncSlaves.end(); ++it)
{
BuddyResyncerBulkSyncSlave* slave = it->get();
std::lock_guard<Mutex> lock(slave->stateMutex);
while (slave->isRunning)
slave->isRunningChangeCond.wait(&slave->stateMutex);
syncErrors |= slave->getStats().dirErrors != 0;
syncErrors |= slave->getStats().fileErrors != 0;
}
syncErrors |= sessionStoreResyncer->getStats().errors;
{
while (modSyncSlave->isRunning)
modSyncSlave->isRunningChangeCond.wait(&modSyncSlave->stateMutex);
syncErrors |= modSyncSlave->getStats().errors != 0;
}
if (getState() == BuddyResyncJobState_RUNNING || getState() == BuddyResyncJobState_INTERRUPTED)
{
if (syncErrors)
setState(BuddyResyncJobState_ERRORS);
else if (getState() == BuddyResyncJobState_RUNNING)
setState(BuddyResyncJobState_SUCCESS);
// delete timestamp override file if it exists.
BuddyCommTk::setBuddyNeedsResync(metaPath, false);
const TargetConsistencyState buddyState = newBuddyState();
informBuddy(buddyState);
informMgmtd(buddyState);
const bool interrupted = getState() != BuddyResyncJobState_SUCCESS;
LOG(MIRRORING, WARNING, "Resync finished.", interrupted, syncErrors);
}
internodeSyncer->setResyncInProgress(false);
endTime = time(NULL);
// restart all the worker threads
if (workersStopped)
workerBarrier.wait();
// if the resync was aborted, the mod sync queue may still contain items. additionally, workers
// may be waiting for a changeset slot, or they may have started executing after the resync was
// aborted by the sync slaves, but before the resync was officially set to "not running".
// we cannot set the resync to "not running" in abort() because we have no upper bound for the
// number of worker threads. even if we did set the resync to "not running" in abort() and
// cleared the sync queues at the same time, there may still be an arbitrary number of threads
// waiting for a changeset slot.
// instead, we have to wait for each thread to "see" that the resync is over, and periodically
// clear the sync queue to unblock those workers that are still waiting for slots.
if (syncErrors)
{
SynchronizedCounter counter;
for (auto it = workers->begin(); it != workers->end(); ++it)
{
auto& worker = **it;
worker.getWorkQueue()->addPersonalWork(
new IncSyncedCounterWork(&counter),
worker.getPersonalWorkQueue());
}
while (!counter.timedWaitForCount(workers->size(), 100))
{
while (!syncCandidates.isFilesEmpty())
{
MetaSyncCandidateFile candidate;
syncCandidates.fetch(candidate, this);
candidate.signal();
}
}
}
}
void BuddyResyncJob::stopAllWorkersOn(Barrier& barrier)
{
WorkerList* workers = Program::getApp()->getWorkers();
for (WorkerListIter workerIt = workers->begin(); workerIt != workers->end(); ++workerIt)
{
Worker* worker = *workerIt;
PersonalWorkQueue* personalQ = worker->getPersonalWorkQueue();
MultiWorkQueue* workQueue = worker->getWorkQueue();
workQueue->addPersonalWork(new BarrierWork(&barrier), personalQ);
}
barrier.wait(); // Wait until all workers are blocked.
}
void BuddyResyncJob::abort(bool wait_for_completion)
{
setState(BuddyResyncJobState_INTERRUPTED);
gatherSlave->selfTerminate();
// set onlyTerminateIfIdle on the slaves to false - they will be stopped by the main loop then.
for (auto it = bulkSyncSlaves.begin(); it != bulkSyncSlaves.end(); ++it)
{
BuddyResyncerBulkSyncSlave* slave = it->get();
slave->setOnlyTerminateIfIdle(false);
}
modSyncSlave->selfTerminate();
int retry = 600;
/* Wait till all on-going thread events are fetched or max 30mins.
* (fetch waits for 3secs if there are no files to be fetched)
*/
if (wait_for_completion)
{
modSyncSlave->join();
while (threadCount > 0 && retry)
{
LOG(MIRRORING, WARNING, "Wait for pending worker threads to finish");
if (!syncCandidates.isFilesEmpty())
{
MetaSyncCandidateFile candidate;
syncCandidates.fetch(candidate, this);
candidate.signal();
}
retry--;
}
if (threadCount)
LOG(MIRRORING, ERR, "Cleanup of aborted resync failed: I/O worker threads"
" did not finish properly: ",
("threadCount", threadCount.load()));
}
}
bool BuddyResyncJob::startGatherSlaves()
{
try
{
gatherSlave->resetSelfTerminate();
gatherSlave->start();
gatherSlave->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what());
return false;
}
return true;
}
bool BuddyResyncJob::startSyncSlaves()
{
App* app = Program::getApp();
const NumNodeID localNodeID = app->getLocalNodeNumID();
const NumNodeID buddyNodeID(
app->getMetaBuddyGroupMapper()->getBuddyTargetID(localNodeID.val(), NULL) );
for (size_t i = 0; i < bulkSyncSlaves.size(); i++)
{
try
{
bulkSyncSlaves[i]->resetSelfTerminate();
bulkSyncSlaves[i]->start();
bulkSyncSlaves[i]->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what() );
for (size_t j = 0; j < i; j++)
bulkSyncSlaves[j]->selfTerminate();
return false;
}
}
try
{
modSyncSlave->resetSelfTerminate();
modSyncSlave->start();
modSyncSlave->setIsRunning(true);
}
catch (PThreadCreateException& e)
{
LogContext(__func__).logErr(std::string("Unable to start thread: ") + e.what() );
for (size_t j = 0; j < bulkSyncSlaves.size(); j++)
bulkSyncSlaves[j]->selfTerminate();
return false;
}
return true;
}
void BuddyResyncJob::joinGatherSlaves()
{
gatherSlave->join();
}
MetaBuddyResyncJobStatistics BuddyResyncJob::getJobStats()
{
std::lock_guard<Mutex> lock(stateMutex);
BuddyResyncerGatherSlave::Stats gatherStats = gatherSlave->getStats();
const uint64_t dirsDiscovered = gatherStats.dirsDiscovered;
const uint64_t gatherErrors = gatherStats.errors;
uint64_t dirsSynced = 0;
uint64_t filesSynced = 0;
uint64_t dirErrors = 0;
uint64_t fileErrors = 0;
for (auto syncerIt = bulkSyncSlaves.begin(); syncerIt != bulkSyncSlaves.end(); ++syncerIt)
{
BuddyResyncerBulkSyncSlave::Stats bulkSyncStats = (*syncerIt)->getStats();
dirsSynced += bulkSyncStats.dirsSynced;
filesSynced += bulkSyncStats.filesSynced;
dirErrors += bulkSyncStats.dirErrors;
fileErrors += bulkSyncStats.fileErrors;
}
SessionStoreResyncer::Stats sessionSyncStats = sessionStoreResyncer->getStats();
const uint64_t sessionsToSync = sessionSyncStats.sessionsToSync;
const uint64_t sessionsSynced = sessionSyncStats.sessionsSynced;
const bool sessionSyncErrors = sessionSyncStats.errors;
BuddyResyncerModSyncSlave::Stats modSyncStats = modSyncSlave->getStats();
uint64_t modObjectsSynced = modSyncStats.objectsSynced;
uint64_t modSyncErrors = modSyncStats.errors;
return MetaBuddyResyncJobStatistics(
state, startTime, endTime,
dirsDiscovered, gatherErrors,
dirsSynced, filesSynced, dirErrors, fileErrors,
sessionsToSync, sessionsSynced, sessionSyncErrors,
modObjectsSynced, modSyncErrors);
}
/**
* Determine the state for the buddy after the end of a resync job.
* @returns the new state to be set on the buddy accroding to this job's JobState.
*/
TargetConsistencyState BuddyResyncJob::newBuddyState()
{
switch (getState())
{
case BuddyResyncJobState_ERRORS:
case BuddyResyncJobState_INTERRUPTED:
case BuddyResyncJobState_FAILURE:
return TargetConsistencyState_BAD;
case BuddyResyncJobState_SUCCESS:
return TargetConsistencyState_GOOD;
default:
LOG(MIRRORING, ERR, "Undefined resync state.", state);
return TargetConsistencyState_BAD;
}
}
void BuddyResyncJob::informBuddy(const TargetConsistencyState newTargetState)
{
App* app = Program::getApp();
NodeStore* metaNodes = app->getMetaNodes();
MirrorBuddyGroupMapper* buddyGroups = app->getMetaBuddyGroupMapper();
NumNodeID buddyNodeID =
NumNodeID(buddyGroups->getBuddyTargetID(app->getLocalNodeNumID().val()));
auto metaNode = metaNodes->referenceNode(buddyNodeID);
if (!metaNode)
{
LOG(MIRRORING, ERR, "Unable to inform buddy about finished resync", buddyNodeID.str());
return;
}
UInt16List nodeIDs(1, buddyNodeID.val());
UInt8List states(1, newTargetState);
SetTargetConsistencyStatesMsg msg(NODETYPE_Meta, &nodeIDs, &states, false);
const auto respMsg = MessagingTk::requestResponse(*metaNode, msg,
NETMSGTYPE_SetTargetConsistencyStatesResp);
if (!respMsg)
{
LogContext(__func__).logErr(
"Unable to inform buddy about finished resync. "
"BuddyNodeID: " + buddyNodeID.str() + "; "
"error: Communication Error");
return;
}
{
auto* respMsgCast = static_cast<SetTargetConsistencyStatesRespMsg*>(respMsg.get());
FhgfsOpsErr result = respMsgCast->getResult();
if (result != FhgfsOpsErr_SUCCESS)
{
LogContext(__func__).logErr(
"Error while informing buddy about finished resync. "
"BuddyNodeID: " + buddyNodeID.str() + "; "
"error: " + boost::lexical_cast<std::string>(result) );
}
}
}
void BuddyResyncJob::informMgmtd(const TargetConsistencyState newTargetState)
{
App* app = Program::getApp();
NodeStore* mgmtNodes = app->getMgmtNodes();
auto mgmtNode = mgmtNodes->referenceFirstNode();
if (!mgmtNode)
{
LOG(MIRRORING, ERR, "Unable to communicate with management node.");
return;
}
UInt16List nodeIDs(1, buddyNodeID.val());
UInt8List states(1, newTargetState);
SetTargetConsistencyStatesMsg msg(NODETYPE_Meta, &nodeIDs, &states, false);
const auto respMsg = MessagingTk::requestResponse(*mgmtNode, msg,
NETMSGTYPE_SetTargetConsistencyStatesResp);
if (!respMsg)
{
LOG(MIRRORING, ERR,
"Unable to inform management node about finished resync: Communication error.");
return;
}
{
auto* respMsgCast = static_cast<SetTargetConsistencyStatesRespMsg*>(respMsg.get());
FhgfsOpsErr result = respMsgCast->getResult();
if (result != FhgfsOpsErr_SUCCESS)
LOG(MIRRORING, ERR, "Error informing management node about finished resync.", result);
}
}

View File

@@ -0,0 +1,92 @@
#pragma once
#include <components/buddyresyncer/BuddyResyncerGatherSlave.h>
#include <components/buddyresyncer/SessionStoreResyncer.h>
#include <common/storage/mirroring/BuddyResyncJobStatistics.h>
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/threading/PThread.h>
#include <common/nodes/NumNodeID.h>
#include <common/nodes/TargetStateInfo.h>
#include <atomic>
#include <mutex>
class BuddyResyncerBulkSyncSlave;
class BuddyResyncerModSyncSlave;
class BuddyResyncJob : public PThread
{
public:
BuddyResyncJob();
~BuddyResyncJob();
virtual void run();
void abort(bool wait_for_completion);
MetaBuddyResyncJobStatistics getJobStats();
std::atomic<unsigned int> threadCount{ 0 };
private:
BuddyResyncJobState state;
Mutex stateMutex;
int64_t startTime;
int64_t endTime;
NumNodeID buddyNodeID;
MetaSyncCandidateStore syncCandidates;
std::unique_ptr<BuddyResyncerGatherSlave> gatherSlave;
std::vector<std::unique_ptr<BuddyResyncerBulkSyncSlave>> bulkSyncSlaves;
std::unique_ptr<BuddyResyncerModSyncSlave> modSyncSlave;
std::unique_ptr<SessionStoreResyncer> sessionStoreResyncer;
bool startGatherSlaves();
bool startSyncSlaves();
void joinGatherSlaves();
public:
BuddyResyncJobState getState()
{
std::lock_guard<Mutex> lock(stateMutex);
return state;
}
bool isRunning()
{
std::lock_guard<Mutex> lock(stateMutex);
return state == BuddyResyncJobState_RUNNING;
}
void enqueue(MetaSyncCandidateFile syncCandidate, PThread* caller)
{
syncCandidates.add(std::move(syncCandidate), caller);
}
void registerOps()
{
this->threadCount += 1;
}
void unregisterOps()
{
this->threadCount -= 1;
}
private:
void setState(const BuddyResyncJobState state)
{
LOG_DEBUG(__func__, Log_DEBUG, "Setting state: "
+ StringTk::uintToStr(static_cast<int>(state) ) );
std::lock_guard<Mutex> lock(stateMutex);
this->state = state;
}
TargetConsistencyState newBuddyState();
void informBuddy(const TargetConsistencyState newTargetState);
void informMgmtd(const TargetConsistencyState newTargetState);
void stopAllWorkersOn(Barrier& barrier);
};

View File

@@ -0,0 +1,89 @@
#include "BuddyResyncer.h"
#include <program/Program.h>
__thread MetaSyncCandidateFile* BuddyResyncer::currentThreadChangeSet = 0;
BuddyResyncer::~BuddyResyncer()
{
if (job)
{
job->abort(false);
job->join();
SAFE_DELETE(job);
}
}
FhgfsOpsErr BuddyResyncer::startResync()
{
std::lock_guard<Mutex> lock(jobMutex);
if (noNewResyncs)
return FhgfsOpsErr_INTERRUPTED;
if (!job)
{
job = new BuddyResyncJob();
job->start();
return FhgfsOpsErr_SUCCESS;
}
switch (job->getState())
{
case BuddyResyncJobState_NOTSTARTED:
case BuddyResyncJobState_RUNNING:
return FhgfsOpsErr_INUSE;
default:
// a job must never be started more than once. to ensure this, we create a new job for
// every resync process, because doing so allows us to use NOTSTARTED and RUNNING as
// "job is currently active" values. otherwise, a second resync may see state SUCCESS and
// allow duplicate resyncer activity.
// if a job is still active, don't wait for very long - it may take a while to finish. the
// internode syncer will retry periodically, so this will work fine.
if (!job->timedjoin(10))
return FhgfsOpsErr_INUSE;
delete job;
job = new BuddyResyncJob();
job->start();
return FhgfsOpsErr_SUCCESS;
}
}
void BuddyResyncer::shutdown()
{
std::unique_ptr<BuddyResyncJob> job;
{
std::lock_guard<Mutex> lock(jobMutex);
job.reset(this->job);
this->job = nullptr;
noNewResyncs = true;
}
if (job)
{
job->abort(false);
job->join();
}
}
void BuddyResyncer::commitThreadChangeSet()
{
BEEGFS_BUG_ON(!currentThreadChangeSet, "no change set active");
auto* job = Program::getApp()->getBuddyResyncer()->getResyncJob();
std::unique_ptr<MetaSyncCandidateFile> candidate(currentThreadChangeSet);
currentThreadChangeSet = nullptr;
Barrier syncDone(2);
candidate->prepareSignal(syncDone);
job->enqueue(std::move(*candidate), PThread::getCurrentThread());
syncDone.wait();
}

View File

@@ -0,0 +1,69 @@
#pragma once
#include <components/buddyresyncer/BuddyResyncJob.h>
#include <common/storage/StorageErrors.h>
#include <common/Common.h>
#include <mutex>
/**
* This component does not represent a thread by itself. Instead, it manages a group of "slave
* threads" that are started and stopped when needed.
*
* Other components should only use this component as an interface and not access the slave threads
* directly.
*/
class BuddyResyncer
{
public:
BuddyResyncer()
: job(NULL), noNewResyncs(false)
{ }
~BuddyResyncer();
FhgfsOpsErr startResync();
void shutdown();
static void commitThreadChangeSet();
private:
BuddyResyncJob* job; // Note: In the Storage Server, this is a Map. Here it's just one pointer
// that's set to NULL when no job is present.
Mutex jobMutex;
public:
BuddyResyncJob* getResyncJob()
{
std::lock_guard<Mutex> lock(jobMutex);
return job;
}
static void registerSyncChangeset()
{
BEEGFS_BUG_ON(currentThreadChangeSet, "current changeset not nullptr");
currentThreadChangeSet = new MetaSyncCandidateFile;
}
static void abandonSyncChangeset()
{
delete currentThreadChangeSet;
currentThreadChangeSet = nullptr;
}
static MetaSyncCandidateFile* getSyncChangeset()
{
return currentThreadChangeSet;
}
private:
static __thread MetaSyncCandidateFile* currentThreadChangeSet;
bool noNewResyncs;
// No copy allowed
BuddyResyncer(const BuddyResyncer&);
BuddyResyncer& operator=(const BuddyResyncer&);
};

View File

@@ -0,0 +1,234 @@
#include "BuddyResyncerBulkSyncSlave.h"
#include <common/net/message/storage/mirroring/ResyncRawInodesRespMsg.h>
#include <common/toolkit/StringTk.h>
#include <common/toolkit/MessagingTk.h>
#include <common/Common.h>
#include <net/message/storage/mirroring/ResyncRawInodesMsgEx.h>
#include <net/msghelpers/MsgHelperXAttr.h>
#include <program/Program.h>
#include <toolkit/XAttrTk.h>
#include <dirent.h>
BuddyResyncerBulkSyncSlave::BuddyResyncerBulkSyncSlave(BuddyResyncJob& parentJob,
MetaSyncCandidateStore* syncCandidates, uint8_t slaveID, const NumNodeID& buddyNodeID) :
SyncSlaveBase("BuddyResyncerBulkSyncSlave_" + StringTk::uintToStr(slaveID), parentJob,
buddyNodeID),
syncCandidates(syncCandidates)
{
}
void BuddyResyncerBulkSyncSlave::syncLoop()
{
EntryLockStore* const lockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
while (!getSelfTerminateNotIdle())
{
MetaSyncCandidateDir candidate;
syncCandidates->fetch(candidate, this);
// the sync candidate we have retrieved may be invalid if this thread was ordered to
// terminate and the sync candidate store has no more directories queued for us.
// in this case, we may end the sync because we have no more candidates, and the resync job
// guarantees that all gather threads have completed before the bulk syncers are ordered to
// finish.
if (syncCandidates->isDirsEmpty() && candidate.getRelativePath().empty() &&
getSelfTerminate())
return;
if (candidate.getType() == MetaSyncDirType::InodesHashDir ||
candidate.getType() == MetaSyncDirType::DentriesHashDir)
{
// lock the hash path in accordance with MkLocalDir, RmLocalDir and RmDir.
const auto& hashDir = candidate.getRelativePath();
auto slash1 = hashDir.find('/');
auto slash2 = hashDir.find('/', slash1 + 1);
auto hash1 = StringTk::strHexToUInt(hashDir.substr(slash1 + 1, slash2 - slash1 - 1));
auto hash2 = StringTk::strHexToUInt(hashDir.substr(slash2 + 1));
HashDirLock hashLock = {lockStore, {hash1, hash2}};
const FhgfsOpsErr resyncRes = resyncDirectory(candidate, "");
if (resyncRes == FhgfsOpsErr_SUCCESS)
continue;
numDirErrors.increase();
parentJob->abort(false);
return;
}
// not a hash dir, so it must be a content directory. sync the #fSiDs# first, then the actual
// content directory. we lock the directory inode the content directory belongs to because we
// must not allow a concurrent meta action to delete the content directory while we are
// resyncing it. concurrent modification of directory contents could be allowed, though.
const std::string dirInodeID = Path(candidate.getRelativePath()).back();
const std::string fullPath = META_BUDDYMIRROR_SUBDIR_NAME "/" + candidate.getRelativePath();
FileIDLock dirLock(lockStore, dirInodeID, false);
// first ensure that the directory still exists - a concurrent modification may have deleted
// it. this would not be an error; bulk resync should not touch it, an modification sync
// would remove it completely.
if (::access(fullPath.c_str(), F_OK) != 0 && errno == ENOENT)
{
numDirsSynced.increase(); // Count it anyway, so the sums match up.
continue;
}
MetaSyncCandidateDir fsIDs(
candidate.getRelativePath() + "/" + META_DIRENTRYID_SUB_STR,
MetaSyncDirType::InodesHashDir);
FhgfsOpsErr resyncRes = resyncDirectory(fsIDs, dirInodeID);
if (resyncRes == FhgfsOpsErr_SUCCESS)
resyncRes = resyncDirectory(candidate, dirInodeID);
if (resyncRes != FhgfsOpsErr_SUCCESS)
{
numDirErrors.increase();
parentJob->abort(false);
return;
}
else
{
numDirsSynced.increase();
}
}
}
FhgfsOpsErr BuddyResyncerBulkSyncSlave::resyncDirectory(const MetaSyncCandidateDir& root,
const std::string& inodeID)
{
StreamCandidateArgs args(*this, root, inodeID);
return resyncAt(Path(root.getRelativePath()), true, streamCandidateDir, &args);
}
FhgfsOpsErr BuddyResyncerBulkSyncSlave::streamCandidateDir(Socket& socket,
const MetaSyncCandidateDir& candidate, const std::string& inodeID)
{
EntryLockStore* const lockStore = Program::getApp()->getMirroredSessions()->getEntryLockStore();
Path candidatePath(META_BUDDYMIRROR_SUBDIR_NAME "/" + candidate.getRelativePath());
std::unique_ptr<DIR, StorageTk::CloseDirDeleter> dir(opendir(candidatePath.str().c_str()));
if (!dir)
{
LOG(MIRRORING, ERR, "Could not open candidate directory.", candidatePath, sysErr);
return FhgfsOpsErr_INTERNAL;
}
int dirFD = ::dirfd(dir.get());
if (dirFD < 0)
{
LOG(MIRRORING, ERR, "Could not open candidate directory.", candidatePath, sysErr);
return FhgfsOpsErr_INTERNAL;
}
while (true)
{
struct dirent* entry;
#if USE_READDIR_P
struct dirent entryBuf;
int err = ::readdir_r(dir.get(), &entryBuf, &entry);
#else
errno = 0;
entry = readdir(dir.get());
int err = entry ? 0 : errno;
#endif
if (err > 0)
{
LOG(MIRRORING, ERR, "Could not read candidate directory.", candidatePath, sysErr);
numDirErrors.increase();
break;
}
if (!entry)
break;
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
continue;
struct stat statData;
if (::fstatat(dirFD, entry->d_name, &statData, AT_SYMLINK_NOFOLLOW) < 0)
{
// the file/directory may have gone away. this is not an error, and the secondary will
// delete the file/directory as well.
if (errno == ENOENT)
continue;
LOG(MIRRORING, ERR, "Could not stat resync candidate.",
candidatePath, entry->d_name, sysErr);
numFileErrors.increase();
continue;
}
if (!S_ISDIR(statData.st_mode) && !S_ISREG(statData.st_mode))
{
LOG(MIRRORING, ERR, "Resync candidate is neither file nor directory.",
candidatePath, entry->d_name, statData.st_mode);
numFileErrors.increase();
continue;
}
if (candidate.getType() == MetaSyncDirType::ContentDir)
{
// if it's in a content directory and a directory, it can really only be the fsids dir.
// locking for this case is already sorted, so we only have to transfer the (empty)
// inode metadata to tell the secondary that the directory may stay.
if (S_ISDIR(statData.st_mode))
{
const FhgfsOpsErr streamRes = streamInode(socket, Path(entry->d_name), true);
if (streamRes != FhgfsOpsErr_SUCCESS)
return streamRes;
}
else
{
ParentNameLock dentryLock(lockStore, inodeID, entry->d_name);
const auto streamRes = streamDentry(socket, Path(), entry->d_name);
if (streamRes != FhgfsOpsErr_SUCCESS)
{
numFileErrors.increase();
return streamRes;
}
else
{
numFilesSynced.increase();
}
}
continue;
}
// we are now either in a fsids (file inode) directory or a second-level inode hash-dir,
// which may contain either file or directory inodes. taking a lock unnecessarily is stilll
// cheaper than reading the inode from disk to determine its type, so just lock the inode id
// as file
FileIDLock dirLock(lockStore, entry->d_name, true);
// access the file once more, because it may have been deleted in the meantime. a new entry
// with the same name cannot appear in a sane filesystem (that would indicate an ID being
// reused).
if (faccessat(dirFD, entry->d_name, F_OK, 0) < 0 && errno == ENOENT)
continue;
const FhgfsOpsErr streamRes = streamInode(socket, Path(entry->d_name),
S_ISDIR(statData.st_mode));
if (streamRes != FhgfsOpsErr_SUCCESS)
{
numFileErrors.increase();
return streamRes;
}
else
{
numFilesSynced.increase();
}
}
return sendResyncPacket(socket, std::tuple<>());
}

View File

@@ -0,0 +1,67 @@
#pragma once
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/threading/PThread.h>
#include <common/storage/StorageErrors.h>
#include <common/nodes/Node.h>
#include <app/App.h>
#include <components/buddyresyncer/SyncCandidate.h>
#include "SyncSlaveBase.h"
class DirEntry;
class BuddyResyncerBulkSyncSlave : public SyncSlaveBase
{
friend class BuddyResyncer;
friend class BuddyResyncJob;
public:
BuddyResyncerBulkSyncSlave(BuddyResyncJob& parentJob, MetaSyncCandidateStore* syncCandidates, uint8_t slaveID,
const NumNodeID& buddyNodeID);
struct Stats
{
uint64_t dirsSynced;
uint64_t filesSynced;
uint64_t dirErrors;
uint64_t fileErrors;
};
Stats getStats()
{
return Stats{ numDirsSynced.read(), numFilesSynced.read(),
numDirErrors.read(), numFileErrors.read() };
}
private:
MetaSyncCandidateStore* syncCandidates;
AtomicUInt64 numDirsSynced;
AtomicUInt64 numFilesSynced;
AtomicUInt64 numDirErrors;
AtomicUInt64 numFileErrors;
void syncLoop();
FhgfsOpsErr resyncDirectory(const MetaSyncCandidateDir& root, const std::string& inodeID);
FhgfsOpsErr streamCandidateDir(Socket& socket, const MetaSyncCandidateDir& candidate,
const std::string& inodeID);
private:
typedef std::tuple<
BuddyResyncerBulkSyncSlave&,
const MetaSyncCandidateDir&,
const std::string&> StreamCandidateArgs;
static FhgfsOpsErr streamCandidateDir(Socket* socket, void* context)
{
using std::get;
auto& args = *(StreamCandidateArgs*) context;
return get<0>(args).streamCandidateDir(*socket, get<1>(args), get<2>(args));
}
};

View File

@@ -0,0 +1,134 @@
#include <common/storage/Metadata.h>
#include <common/app/log/LogContext.h>
#include <common/toolkit/StringTk.h>
#include <toolkit/BuddyCommTk.h>
#include <program/Program.h>
#include "BuddyResyncerGatherSlave.h"
BuddyResyncerGatherSlave::BuddyResyncerGatherSlave(MetaSyncCandidateStore* syncCandidates) :
PThread("BuddyResyncerGatherSlave"),
isRunning(false),
syncCandidates(syncCandidates)
{
metaBuddyPath = Program::getApp()->getMetaPath() + "/" CONFIG_BUDDYMIRROR_SUBDIR_NAME;
}
void BuddyResyncerGatherSlave::run()
{
setIsRunning(true);
try
{
LOG(MIRRORING, DEBUG, "Component started");
registerSignalHandler();
workLoop();
LOG(MIRRORING, DEBUG, "Component stopped");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
void BuddyResyncerGatherSlave::workLoop()
{
crawlDir(metaBuddyPath + "/" META_INODES_SUBDIR_NAME, MetaSyncDirType::InodesHashDir);
crawlDir(metaBuddyPath + "/" META_DENTRIES_SUBDIR_NAME, MetaSyncDirType::DentriesHashDir);
}
void BuddyResyncerGatherSlave::crawlDir(const std::string& path, const MetaSyncDirType type,
const unsigned level)
{
LOG_DBG(MIRRORING, DEBUG, "Entering hash dir.", level, path);
std::unique_ptr<DIR, StorageTk::CloseDirDeleter> dirHandle(::opendir(path.c_str()));
if (!dirHandle)
{
LOG(MIRRORING, ERR, "Unable to open path", path, sysErr);
numErrors.increase();
return;
}
while (!getSelfTerminate())
{
struct dirent* entry;
#if USE_READDIR_R
struct dirent buffer;
const int readRes = ::readdir_r(dirHandle.get(), &buffer, &entry);
#else
errno = 0;
entry = ::readdir(dirHandle.get());
const int readRes = entry ? 0 : errno;
#endif
if (readRes != 0)
{
LOG(MIRRORING, ERR, "Could not read dir entry.", path, sysErr(readRes));
numErrors.increase();
return;
}
if (!entry)
break;
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
continue;
const std::string& candidatePath = path + "/" + entry->d_name;
struct stat statBuf;
const int statRes = ::stat(candidatePath.c_str(), &statBuf);
if (statRes)
{
// in a 2nd level dentry hashdir, content directories may disappear - this is not an error,
// it was most likely caused by an rmdir issued by a user.
if (!(errno == ENOENT && type == MetaSyncDirType::DentriesHashDir && level == 2))
{
LOG(MIRRORING, ERR, "Could not stat dir entry.", candidatePath, sysErr);
numErrors.increase();
}
continue;
}
if (!S_ISDIR(statBuf.st_mode))
{
LOG(MIRRORING, ERR, "Found a non-dir where only directories are expected.", candidatePath,
oct(statBuf.st_mode));
numErrors.increase();
continue;
}
// layout is: (dentries|inodes)/l1/l2/...
// -> level 0 correlates with type
// -> level 1 is not very interesting, except for reporting
// -> level 2 must be synced. if it is a dentry hashdir, its contents must also be crawled.
if (level == 0)
{
crawlDir(candidatePath, type, level + 1);
continue;
}
if (level == 1)
{
LOG_DBG(MIRRORING, DEBUG, "Adding hashdir sync candidate.", candidatePath);
addCandidate(candidatePath, type);
if (type == MetaSyncDirType::DentriesHashDir)
crawlDir(candidatePath, type, level + 1);
continue;
}
// so here we read a 2nd level dentry hashdir. crawl that too, add sync candidates for each
// entry we find - non-directories have already been reported, and the bulk resyncer will
// take care of the fsids directories.
numDirsDiscovered.increase();
LOG_DBG(MIRRORING, DEBUG, "Adding contdir sync candidate.", candidatePath);
addCandidate(candidatePath, MetaSyncDirType::ContentDir);
}
}

View File

@@ -0,0 +1,73 @@
#pragma once
#include <common/app/log/LogContext.h>
#include <common/threading/PThread.h>
#include <components/buddyresyncer/SyncCandidate.h>
#include <mutex>
class BuddyResyncerGatherSlave : public PThread
{
// Grant access to internal mutex
friend class BuddyResyncer;
friend class BuddyResyncJob;
public:
BuddyResyncerGatherSlave(MetaSyncCandidateStore* syncCandidates);
void workLoop();
private:
Mutex stateMutex;
Condition isRunningChangeCond;
AtomicUInt64 numDirsDiscovered;
AtomicUInt64 numErrors;
std::string metaBuddyPath;
bool isRunning;
MetaSyncCandidateStore* syncCandidates;
virtual void run();
void crawlDir(const std::string& path, const MetaSyncDirType type, const unsigned level = 0);
public:
bool getIsRunning()
{
std::lock_guard<Mutex> lock(stateMutex);
return this->isRunning;
}
struct Stats
{
uint64_t dirsDiscovered;
uint64_t errors;
};
Stats getStats()
{
return Stats{ numDirsDiscovered.read(), numErrors.read() };
}
private:
void setIsRunning(const bool isRunning)
{
std::lock_guard<Mutex> lock(stateMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
void addCandidate(const std::string& path, const MetaSyncDirType type)
{
const std::string& relPath = path.substr(metaBuddyPath.size() + 1);
syncCandidates->add(MetaSyncCandidateDir(relPath, type), this);
}
};
typedef std::vector<BuddyResyncerGatherSlave*> BuddyResyncerGatherSlaveVec;
typedef BuddyResyncerGatherSlaveVec::iterator BuddyResyncerGatherSlaveVecIter;

View File

@@ -0,0 +1,142 @@
#include "BuddyResyncerModSyncSlave.h"
#include <common/net/message/storage/mirroring/ResyncRawInodesRespMsg.h>
#include <common/toolkit/StringTk.h>
#include <common/toolkit/MessagingTk.h>
#include <common/toolkit/DebugVariable.h>
#include <common/Common.h>
#include <net/message/storage/mirroring/ResyncRawInodesMsgEx.h>
#include <net/msghelpers/MsgHelperXAttr.h>
#include <program/Program.h>
#include <toolkit/XAttrTk.h>
BuddyResyncerModSyncSlave::BuddyResyncerModSyncSlave(BuddyResyncJob& parentJob,
MetaSyncCandidateStore* syncCandidates, uint8_t slaveID, const NumNodeID& buddyNodeID) :
SyncSlaveBase("BuddyResyncerModSyncSlave_" + StringTk::uintToStr(slaveID), parentJob,
buddyNodeID),
syncCandidates(syncCandidates)
{
}
void BuddyResyncerModSyncSlave::syncLoop()
{
while (!getSelfTerminateNotIdle())
{
if (syncCandidates->waitForFiles(this))
resyncAt(Path(), false, streamCandidates, this);
else if (getOnlyTerminateIfIdle())
break;
}
}
namespace {
struct CandidateSignaler
{
void operator()(MetaSyncCandidateFile* candidate) const
{
candidate->signal();
}
};
bool resyncElemCmp(const MetaSyncCandidateFile::Element& a, const MetaSyncCandidateFile::Element& b)
{
// we must sync deletions before updates and inodes before everything else:
//
// deletions may fail on the secondary, so they *can* be synced first to begin with.
// any item that is deleted and then recreated with an update must be deleted first.
// we also guarantee that no item is created and deleted in the same changeset.
//
// inodes must be synced before dentries because the dentries may link to inodes in the same
// changeset - and if the secondary does not have the appropriate inode yet, the changeset
// must create it.
if (a.isDeletion && !b.isDeletion)
return true;
if (a.type == MetaSyncFileType::Inode && b.type != MetaSyncFileType::Inode)
return true;
return std::make_pair(int(a.type), a.path) < std::make_pair(int(b.type), b.path);
}
}
FhgfsOpsErr BuddyResyncerModSyncSlave::streamCandidates(Socket& socket)
{
DEBUG_ENV_VAR(unsigned, DEBUG_FAIL_MODSYNC, 0, "BEEGFS_DEBUG_FAIL_MODSYNC");
while (!getSelfTerminateNotIdle())
{
if (syncCandidates->isFilesEmpty())
break;
MetaSyncCandidateFile candidate;
syncCandidates->fetch(candidate, this);
// signal the candidate at the end of this loop iteration.
// do it like this because we have a few exit points and also have exceptions to take into
// account.
std::unique_ptr<MetaSyncCandidateFile, CandidateSignaler> signaler(&candidate);
auto resyncElems = candidate.releaseElements();
std::sort(resyncElems.begin(), resyncElems.end(), resyncElemCmp);
for (auto it = resyncElems.begin(); it != resyncElems.end(); ++it)
{
const auto& element = *it;
// element.path is relative to the meta root, so we have to chop off the buddymir/ prefix
const Path itemPath(element.path.substr(strlen(META_BUDDYMIRROR_SUBDIR_NAME) + 1));
FhgfsOpsErr resyncRes;
LOG_DBG(MIRRORING, DEBUG, "Syncing one modification.", element.path, element.isDeletion,
int(element.type));
switch (element.type)
{
case MetaSyncFileType::Dentry:
resyncRes = element.isDeletion
? deleteDentry(socket, itemPath.dirname(), itemPath.back())
: streamDentry(socket, itemPath.dirname(), itemPath.back());
break;
case MetaSyncFileType::Directory:
case MetaSyncFileType::Inode:
resyncRes = element.isDeletion
? deleteInode(socket, itemPath, element.type == MetaSyncFileType::Directory)
: streamInode(socket, itemPath, element.type == MetaSyncFileType::Directory);
break;
default:
LOG(MIRRORING, ERR, "this should never happen");
return FhgfsOpsErr_INTERNAL;
}
if (resyncRes != FhgfsOpsErr_SUCCESS || DEBUG_FAIL_MODSYNC)
{
LOG(MIRRORING, ERR, "Modification resync failed.", element.path, element.isDeletion,
resyncRes);
numErrors.increase();
// Since this error prevents the resync from reaching a GOOD state on the secondary,
// we abort here.
parentJob->abort(true);
// terminate the current stream, start a new one if necessary. we could (in theory)
// reuse the current stream, but terminating a stream that has seen an error is simpler
// to handle than keeping it open. also, bulk resync would like "fail on error"
// semantics very much.
sendResyncPacket(socket, std::tuple<>());
return FhgfsOpsErr_SUCCESS;
}
else
{
numObjectsSynced.increase();
}
}
}
sendResyncPacket(socket, std::tuple<>());
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,50 @@
#pragma once
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/threading/PThread.h>
#include <common/storage/StorageErrors.h>
#include <common/nodes/Node.h>
#include <app/App.h>
#include <components/buddyresyncer/SyncCandidate.h>
#include "SyncSlaveBase.h"
class DirEntry;
class BuddyResyncerModSyncSlave : public SyncSlaveBase
{
friend class BuddyResyncer;
friend class BuddyResyncJob;
public:
BuddyResyncerModSyncSlave(BuddyResyncJob& parentJob, MetaSyncCandidateStore* syncCandidates,
uint8_t slaveID, const NumNodeID& buddyNodeID);
struct Stats
{
uint64_t objectsSynced;
uint64_t errors;
};
Stats getStats()
{
return Stats{ numObjectsSynced.read(), numErrors.read() };
}
private:
MetaSyncCandidateStore* syncCandidates;
AtomicUInt64 numObjectsSynced;
AtomicUInt64 numErrors;
void syncLoop();
FhgfsOpsErr streamCandidates(Socket& socket);
private:
static FhgfsOpsErr streamCandidates(Socket* socket, void* context)
{
return static_cast<BuddyResyncerModSyncSlave*>(context)->streamCandidates(*socket);
}
};

View File

@@ -0,0 +1,59 @@
#include "SessionStoreResyncer.h"
#include <common/toolkit/MessagingTk.h>
#include <common/net/message/storage/mirroring/ResyncSessionStoreMsg.h>
#include <common/net/message/storage/mirroring/ResyncSessionStoreRespMsg.h>
#include <common/toolkit/StringTk.h>
#include <program/Program.h>
#include <app/App.h>
#include <boost/scoped_array.hpp>
SessionStoreResyncer::SessionStoreResyncer(const NumNodeID& buddyNodeID)
: buddyNodeID(buddyNodeID) {}
void SessionStoreResyncer::doSync()
{
App* app = Program::getApp();
SessionStore* sessions = app->getMirroredSessions();
NodeStoreServers* metaNodes = app->getMetaNodes();
const uint64_t numSessions = sessions->getSize();
numSessionsToSync.set(numSessions);
// Serialize sessions store into buffer
std::pair<std::unique_ptr<char[]>, size_t> sessionStoreSerBuf = sessions->serializeToBuf();
if (sessionStoreSerBuf.second == 0)
{
// Serialization failed.
errors.set(1);
return;
}
LOG(MIRRORING, DEBUG, "Serialized session store", ("size", sessionStoreSerBuf.second));
ResyncSessionStoreMsg msg(sessionStoreSerBuf.first.get(), sessionStoreSerBuf.second);
RequestResponseArgs rrArgs(NULL, &msg, NETMSGTYPE_ResyncSessionStoreResp);
RequestResponseNode rrNode(buddyNodeID, metaNodes);
msg.registerStreamoutHook(rrArgs);
FhgfsOpsErr requestRes = MessagingTk::requestResponseNode(&rrNode, &rrArgs);
if (requestRes != FhgfsOpsErr_SUCCESS)
{
errors.set(1);
LOG(MIRRORING, ERR, "Request failed.", requestRes);
return;
}
ResyncSessionStoreRespMsg* resp = (ResyncSessionStoreRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr retVal = resp->getResult();
LOG(MIRRORING, DEBUG, "ResyncSessionStoreRespMsg", retVal);
if (retVal != FhgfsOpsErr_SUCCESS)
errors.set(1);
else
numSessionsSynced.set(numSessions);
}

View File

@@ -0,0 +1,35 @@
#pragma once
#include <common/nodes/Node.h>
#include <common/threading/PThread.h>
class SessionStoreResyncer
{
friend class BuddyResyncer;
friend class BuddyResyncJob;
public:
SessionStoreResyncer(const NumNodeID& buddyNodeID);
struct Stats
{
uint64_t sessionsToSync;
uint64_t sessionsSynced;
bool errors;
};
Stats getStats()
{
return Stats{ numSessionsToSync.read(), numSessionsSynced.read(), errors.read() != 0 };
}
private:
NumNodeID buddyNodeID;
AtomicUInt64 numSessionsToSync;
AtomicUInt64 numSessionsSynced;
AtomicSizeT errors; // 0 / 1
void doSync();
};

View File

@@ -0,0 +1,113 @@
#pragma once
#include <common/toolkit/serialization/Serialization.h>
#include <common/storage/mirroring/SyncCandidateStore.h>
#include <common/threading/Barrier.h>
#include <string>
enum class MetaSyncDirType
{
InodesHashDir,
DentriesHashDir,
ContentDir,
};
GCC_COMPAT_ENUM_CLASS_OPEQNEQ(MetaSyncDirType)
class MetaSyncCandidateDir
{
public:
MetaSyncCandidateDir(const std::string& relativePath, MetaSyncDirType type):
relPath(relativePath), type(type)
{}
MetaSyncCandidateDir() = default;
private:
std::string relPath;
MetaSyncDirType type;
public:
const std::string& getRelativePath() const { return relPath; }
MetaSyncDirType getType() const { return type; }
};
enum class MetaSyncFileType
{
Inode,
Dentry,
Directory,
};
GCC_COMPAT_ENUM_CLASS_OPEQNEQ(MetaSyncFileType)
template<>
struct SerializeAs<MetaSyncFileType> {
typedef uint8_t type;
};
class MetaSyncCandidateFile
{
public:
struct Element
{
std::string path;
MetaSyncFileType type;
bool isDeletion;
};
MetaSyncCandidateFile(): barrier(nullptr) {}
MetaSyncCandidateFile(MetaSyncCandidateFile&& src):
barrier(nullptr)
{
swap(src);
}
MetaSyncCandidateFile& operator=(MetaSyncCandidateFile&& other)
{
MetaSyncCandidateFile(std::move(other)).swap(*this);
return *this;
}
void swap(MetaSyncCandidateFile& other)
{
paths.swap(other.paths);
std::swap(barrier, other.barrier);
}
void signal()
{
barrier->wait();
}
friend void swap(MetaSyncCandidateFile& a, MetaSyncCandidateFile& b)
{
a.swap(b);
}
private:
std::vector<Element> paths;
Barrier* barrier;
public:
const std::vector<Element>& getElements() const { return paths; }
std::vector<Element> releaseElements() { return std::move(paths); }
void addModification(std::string path, MetaSyncFileType type)
{
paths.push_back(Element{std::move(path), type, false});
}
void addDeletion(std::string path, MetaSyncFileType type)
{
paths.push_back(Element{std::move(path), type, true});
}
void prepareSignal(Barrier& barrier)
{
this->barrier = &barrier;
}
};
typedef SyncCandidateStore<MetaSyncCandidateDir, MetaSyncCandidateFile> MetaSyncCandidateStore;

View File

@@ -0,0 +1,249 @@
#include "SyncSlaveBase.h"
#include <common/net/message/storage/mirroring/ResyncRawInodesRespMsg.h>
#include <net/message/storage/mirroring/ResyncRawInodesMsgEx.h>
#include <net/msghelpers/MsgHelperXAttr.h>
#include <program/Program.h>
#include <toolkit/XAttrTk.h>
void SyncSlaveBase::run()
{
setIsRunning(true);
try
{
LOG(MIRRORING, DEBUG, "Component started");
registerSignalHandler();
syncLoop();
LOG(MIRRORING, DEBUG, "Component stopped");
}
catch (std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
setIsRunning(false);
}
FhgfsOpsErr SyncSlaveBase::receiveAck(Socket& socket)
{
auto resp = MessagingTk::recvMsgBuf(socket);
if (resp.empty())
return FhgfsOpsErr_INTERNAL;
const auto respMsg = PThread::getCurrentThreadApp()->getNetMessageFactory()->createFromBuf(
std::move(resp));
if (respMsg->getMsgType() != NETMSGTYPE_ResyncRawInodesResp)
return FhgfsOpsErr_COMMUNICATION;
return static_cast<ResyncRawInodesRespMsg&>(*respMsg).getResult();
}
FhgfsOpsErr SyncSlaveBase::resyncAt(const Path& basePath, bool wholeDirectory,
FhgfsOpsErr (*streamFn)(Socket*, void*), void* context)
{
const bool sendXAttrs = Program::getApp()->getConfig()->getStoreClientXAttrs();
this->basePath = META_BUDDYMIRROR_SUBDIR_NAME / basePath;
ResyncRawInodesMsgEx msg(basePath, sendXAttrs, wholeDirectory);
RequestResponseNode rrNode(buddyNodeID, Program::getApp()->getMetaNodes());
RequestResponseArgs rrArgs(nullptr, &msg, NETMSGTYPE_ResyncRawInodesResp,
streamFn, context);
// resync processing may take a very long time for each step, eg if a very large directory must
// be cleaned out on the secondary. do not use timeouts for resync communication right now.
rrArgs.minTimeoutMS = -1;
const auto commRes = MessagingTk::requestResponseNode(&rrNode, &rrArgs);
if (commRes != FhgfsOpsErr_SUCCESS)
{
LOG(MIRRORING, ERR, "Error during communication with secondary.", commRes);
return commRes;
}
const auto resyncRes = static_cast<ResyncRawInodesRespMsg&>(*rrArgs.outRespMsg).getResult();
if (resyncRes != FhgfsOpsErr_SUCCESS)
LOG(MIRRORING, ERR, "Error while resyncing directory.", basePath, resyncRes);
return resyncRes;
}
FhgfsOpsErr SyncSlaveBase::streamDentry(Socket& socket, const Path& contDirRelPath,
const std::string& name)
{
std::unique_ptr<DirEntry> dentry(
DirEntry::createFromFile((basePath / contDirRelPath).str(), name));
if (!dentry)
{
LOG(MIRRORING, ERR, "Could not open dentry.", basePath, contDirRelPath, name);
return FhgfsOpsErr_INTERNAL;
}
if (dentry->getIsInodeInlined())
{
auto err = sendResyncPacket(socket, LinkDentryInfo(
MetaSyncFileType::Dentry,
(contDirRelPath / name).str(),
true,
dentry->getID(),
false));
if (err != FhgfsOpsErr_SUCCESS)
return err;
return receiveAck(socket);
}
std::vector<char> dentryContent;
{
Serializer ser;
dentry->serializeDentry(ser);
dentryContent.resize(ser.size());
ser = Serializer(&dentryContent[0], dentryContent.size());
dentry->serializeDentry(ser);
if (!ser.good())
{
LOG(MIRRORING, ERR, "Could not serialize dentry for secondary.");
return FhgfsOpsErr_INTERNAL;
}
}
const FhgfsOpsErr sendRes = sendResyncPacket(socket, FullDentryInfo(
MetaSyncFileType::Dentry,
(contDirRelPath / name).str(),
false,
dentryContent,
false));
if (sendRes != FhgfsOpsErr_SUCCESS)
return sendRes;
return receiveAck(socket);
}
FhgfsOpsErr SyncSlaveBase::streamInode(Socket& socket, const Path& inodeRelPath,
const bool isDirectory)
{
const Path fullPath(basePath / inodeRelPath);
MetaStore& store = *Program::getApp()->getMetaStore();
// Map to store attribute name and its data
std::map<std::string, std::vector<char>> contents;
if (!isDirectory)
{
std::vector<char> attrData;
FhgfsOpsErr readRes;
// Helper function to read and store attribute data in map
auto readAndStoreMetaAttribute = [&](const std::string& attrName)
{
attrData.clear();
readRes = store.getRawMetadata(fullPath, attrName.c_str(), attrData);
if (readRes != FhgfsOpsErr_SUCCESS)
return false;
contents.insert(std::make_pair(attrName, std::move(attrData)));
return true;
};
// Handle META_XATTR_NAME ("user.fhgfs") separately because it can be stored as either
// file contents or an extended attribute, depending on the 'storeUseExtendedAttribs'
// configuration setting in the meta config. In contrast, all other metadata-specific
// attributes are strictly stored as extended attributes and do not have the option to
// be stored as file contents.
if (!readAndStoreMetaAttribute(META_XATTR_NAME))
return readRes;
// Now handle all remaining metadata attributes
std::pair<FhgfsOpsErr, std::vector<std::string>> listXAttrs = XAttrTk::listXAttrs(fullPath.str());
if (listXAttrs.first != FhgfsOpsErr_SUCCESS)
return listXAttrs.first;
for (auto const& attrName : listXAttrs.second)
{
// Process all metadata-specific attributes except META_XATTR_NAME (already handled above)
// This approach ensures we only process attribute(s) that:
// 1. Exist on the inode.
// 2. Are listed in METADATA_XATTR_NAME_LIST, our collection of known metadata attributes.
// 3. Is not META_XATTR_NAME, to prevent duplicate processing.
if (std::find(METADATA_XATTR_NAME_LIST.begin(), METADATA_XATTR_NAME_LIST.end(), attrName)
!= METADATA_XATTR_NAME_LIST.end() && (attrName != META_XATTR_NAME))
{
if (!readAndStoreMetaAttribute(attrName))
return readRes;
}
}
}
const FhgfsOpsErr sendRes = sendResyncPacket(socket, InodeInfo(
isDirectory
? MetaSyncFileType::Directory
: MetaSyncFileType::Inode,
inodeRelPath.str(),
contents,
false));
if (sendRes != FhgfsOpsErr_SUCCESS)
return sendRes;
if (Program::getApp()->getConfig()->getStoreClientXAttrs())
{
auto xattrs = XAttrTk::listUserXAttrs(fullPath.str());
if (xattrs.first != FhgfsOpsErr_SUCCESS)
{
LOG(MIRRORING, ERR, "Could not list resync candidate xattrs.", fullPath, ("error", xattrs.first));
xattrs.second.clear();
return FhgfsOpsErr_INTERNAL;
}
MsgHelperXAttr::StreamXAttrState state(fullPath.str(), std::move(xattrs.second));
const FhgfsOpsErr xattrRes = MsgHelperXAttr::StreamXAttrState::streamXattrFn(&socket, &state);
if (xattrRes != FhgfsOpsErr_SUCCESS)
{
LOG(MIRRORING, ERR, "Error while sending xattrs to secondary.", fullPath, xattrRes);
return FhgfsOpsErr_INTERNAL;
}
}
return receiveAck(socket);
}
FhgfsOpsErr SyncSlaveBase::deleteDentry(Socket& socket, const Path& contDirRelPath,
const std::string& name)
{
auto err = sendResyncPacket(socket, LinkDentryInfo(
MetaSyncFileType::Dentry,
(contDirRelPath / name).str(),
true,
{},
true));
if (err != FhgfsOpsErr_SUCCESS)
return err;
return receiveAck(socket);
}
FhgfsOpsErr SyncSlaveBase::deleteInode(Socket& socket, const Path& inodeRelPath,
const bool isDirectory)
{
auto err = sendResyncPacket(socket, InodeInfo(
isDirectory
? MetaSyncFileType::Directory
: MetaSyncFileType::Inode,
inodeRelPath.str(),
{},
true));
if (err != FhgfsOpsErr_SUCCESS)
return err;
return receiveAck(socket);
}

View File

@@ -0,0 +1,129 @@
#pragma once
#include <common/net/sock/Socket.h>
#include <common/storage/StorageErrors.h>
#include <common/threading/PThread.h>
#include <app/App.h>
class DirEntry;
class SyncSlaveBase : public PThread
{
public:
bool getIsRunning()
{
std::lock_guard<Mutex> lock(stateMutex);
return this->isRunning;
}
void setOnlyTerminateIfIdle(bool value)
{
onlyTerminateIfIdle.set(value);
}
bool getOnlyTerminateIfIdle()
{
return onlyTerminateIfIdle.read();
}
protected:
BuddyResyncJob* parentJob;
NumNodeID buddyNodeID;
Mutex stateMutex;
Condition isRunningChangeCond;
AtomicSizeT onlyTerminateIfIdle;
bool isRunning;
Path basePath;
SyncSlaveBase(const std::string& threadName, BuddyResyncJob& parentJob,
const NumNodeID buddyNodeID):
PThread(threadName), parentJob(&parentJob), buddyNodeID(buddyNodeID), isRunning(false)
{
}
virtual void run() override;
virtual void syncLoop() = 0;
FhgfsOpsErr resyncAt(const Path& basePath, bool wholeDirectory,
FhgfsOpsErr (*streamFn)(Socket*, void*), void* context);
FhgfsOpsErr streamDentry(Socket& socket, const Path& contDirRelPath, const std::string& name);
FhgfsOpsErr streamInode(Socket& socket, const Path& inodeRelPath, const bool isDirectory);
FhgfsOpsErr deleteDentry(Socket& socket, const Path& contDirRelPath, const std::string& name);
FhgfsOpsErr deleteInode(Socket& socket, const Path& inodeRelPath, const bool isDirectory);
void setIsRunning(bool isRunning)
{
std::lock_guard<Mutex> lock(stateMutex);
this->isRunning = isRunning;
isRunningChangeCond.broadcast();
}
bool getSelfTerminateNotIdle()
{
return getSelfTerminate() && !getOnlyTerminateIfIdle();
}
template<typename ValueT>
static FhgfsOpsErr sendResyncPacket(Socket& socket, const ValueT& value)
{
Serializer ser;
ser % value;
const unsigned packetSize = ser.size();
const unsigned totalSize = packetSize + sizeof(uint32_t);
const std::tuple<uint32_t, const ValueT&> packet(packetSize, value);
std::unique_ptr<char[]> buffer(new (std::nothrow) char[totalSize]);
if (!buffer)
{
LOG(MIRRORING, ERR, "Could not allocate memory for resync packet.");
return FhgfsOpsErr_OUTOFMEM;
}
ser = {buffer.get(), totalSize};
ser % packet;
if (!ser.good())
{
LOG(MIRRORING, ERR, "Serialization of resync packet failed.");
return FhgfsOpsErr_INTERNAL;
}
socket.send(buffer.get(), totalSize, 0);
return FhgfsOpsErr_SUCCESS;
}
static FhgfsOpsErr receiveAck(Socket& socket);
private:
typedef std::tuple<
MetaSyncFileType,
const std::string&, // relative path
bool, // is hardlink?
const std::string&, // link target entry id
bool // is deletion?
> LinkDentryInfo;
typedef std::tuple<
MetaSyncFileType,
const std::string&, // relative path
bool, // is hardlink?
const std::vector<char>&, // dentry raw content
bool // is deletion?
> FullDentryInfo;
typedef std::tuple<
MetaSyncFileType,
const std::string&, // relative path
std::map<std::string, std::vector<char>>, // metadata specific attribute's raw contents
bool // is deletion?
> InodeInfo;
};

View File

@@ -0,0 +1,38 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/threading/Atomics.h>
#include <common/threading/Condition.h>
#include <common/app/log/Logger.h>
/**
* Work item intended to stop all worker threads temporarily, detect that all are stopped using a
* barrier, and restarting them using the same barrier.
* Example:
* Barrier workerBarrier(numWorkers + 1);
* <insert instance of BarrierWorkItem(&workerBarrier) into personal queue of numWorkers threads>
* workerBarrier.wait(); // Wait for all workers to stop
* <do something while workers are stopped>
* workerBarrier.wait(); // restart the workers
*/
class BarrierWork : public Work
{
public:
BarrierWork(Barrier* barrier) :
barrier(barrier)
{ }
virtual ~BarrierWork() { }
void process(char*, unsigned, char*, unsigned)
{
LOG_DBG(WORKQUEUES, DEBUG, "Start blocking.");
barrier->wait();
barrier->wait();
LOG_DBG(WORKQUEUES, DEBUG, "Done.");
}
private:
Barrier* barrier;
};

View File

@@ -0,0 +1,114 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/threading/PThread.h>
#include <common/net/message/session/opening/CloseChunkFileMsg.h>
#include <common/net/message/session/opening/CloseChunkFileRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <program/Program.h>
#include "CloseChunkFileWork.h"
#include <boost/lexical_cast.hpp>
void CloseChunkFileWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
{
FhgfsOpsErr commRes = communicate();
*outResult = commRes;
counter->incCount();
}
FhgfsOpsErr CloseChunkFileWork::communicate()
{
const char* logContext = "Close chunk file work";
App* app = Program::getApp();
// prepare request message
CloseChunkFileMsg closeMsg(sessionID, fileHandleID, targetID, pathInfoPtr);
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
{
closeMsg.addMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR);
if(useBuddyMirrorSecond)
{
closeMsg.addMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS);
closeMsg.addMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND);
}
}
closeMsg.setMsgHeaderUserID(msgUserID);
// prepare communication
RequestResponseTarget rrTarget(targetID, app->getTargetMapper(), app->getStorageNodes() );
rrTarget.setTargetStates(app->getTargetStateStore() );
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
rrTarget.setMirrorInfo(app->getStorageBuddyGroupMapper(), useBuddyMirrorSecond);
RequestResponseArgs rrArgs(NULL, &closeMsg, NETMSGTYPE_CloseChunkFileResp);
// communicate
FhgfsOpsErr requestRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_WARNING,
"Communication with storage target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"Session: " + sessionID.str() + "; "
"FileHandle: " + fileHandleID);
return requestRes;
}
// correct response type received
CloseChunkFileRespMsg* closeRespMsg = (CloseChunkFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr closeRemoteRes = closeRespMsg->getResult();
// set current dynamic attribs (even if result not success, because then storageVersion==0)
if(outDynAttribs)
{
DynamicFileAttribs currentDynAttribs(closeRespMsg->getStorageVersion(),
closeRespMsg->getFileSize(), closeRespMsg->getAllocedBlocks(),
closeRespMsg->getModificationTimeSecs(), closeRespMsg->getLastAccessTimeSecs() );
*outDynAttribs = currentDynAttribs;
}
if(closeRemoteRes != FhgfsOpsErr_SUCCESS)
{ // error: chunk file not closed
int logLevel = Log_WARNING;
if(closeRemoteRes == FhgfsOpsErr_INUSE)
logLevel = Log_DEBUG; // happens on ctrl+c, so don't irritate user with these log msgs
LogContext(logContext).log(logLevel,
"Closing chunk file on target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"Error: " + boost::lexical_cast<std::string>(closeRemoteRes) + "; "
"Session: " + sessionID.str() + "; "
"FileHandle: " + std::string(fileHandleID) );
return closeRemoteRes;
}
// success: chunk file closed
LOG_DEBUG(logContext, Log_DEBUG,
"Closed chunk file on target. " +
std::string( (pattern->getPatternType() == StripePatternType_BuddyMirror) ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"Session: " + sessionID.str() + "; "
"FileHandle: " + fileHandleID);
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,69 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/net/sock/Socket.h>
#include <common/storage/striping/StripePattern.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <common/storage/striping/ChunkFileInfo.h>
#include <common/Common.h>
class CloseChunkFileWork : public Work
{
public:
/**
* @param outDynAttribs may be NULL if caller is not interested
*/
CloseChunkFileWork(const NumNodeID sessionID, const std::string& fileHandleID,
StripePattern* pattern, uint16_t targetID, PathInfo* pathInfo,
DynamicFileAttribs *outDynAttribs, FhgfsOpsErr* outResult, SynchronizedCounter* counter) :
sessionID(sessionID), fileHandleID(fileHandleID), pattern(pattern), targetID(targetID),
pathInfoPtr(pathInfo), outDynAttribs(outDynAttribs), outResult(outResult),
counter(counter), useBuddyMirrorSecond(false),
msgUserID(NETMSG_DEFAULT_USERID)
{
// all assignments done in initializer list
}
virtual ~CloseChunkFileWork() {}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
NumNodeID sessionID;
std::string fileHandleID;
StripePattern* pattern;
uint16_t targetID;
PathInfo* pathInfoPtr; // to find chunk files
DynamicFileAttribs* outDynAttribs;
FhgfsOpsErr* outResult;
SynchronizedCounter* counter;
bool useBuddyMirrorSecond;
unsigned msgUserID;
FhgfsOpsErr communicate();
public:
// getters & setters
void setMsgUserID(unsigned msgUserID)
{
this->msgUserID = msgUserID;
}
void setUseBuddyMirrorSecond()
{
this->useBuddyMirrorSecond = true;
}
};

View File

@@ -0,0 +1,90 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/threading/PThread.h>
#include <common/net/message/storage/attribs/GetChunkFileAttribsMsg.h>
#include <common/net/message/storage/attribs/GetChunkFileAttribsRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <program/Program.h>
#include "GetChunkFileAttribsWork.h"
void GetChunkFileAttribsWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen)
{
FhgfsOpsErr commRes = communicate();
*outResult = commRes;
counter->incCount();
}
/**
* @return true if communication successful
*/
FhgfsOpsErr GetChunkFileAttribsWork::communicate()
{
const char* logContext = "Stat chunk file work";
App* app = Program::getApp();
GetChunkFileAttribsMsg getSizeMsg(entryID, targetID, pathInfo);
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
{
getSizeMsg.addMsgHeaderFeatureFlag(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR);
if(useBuddyMirrorSecond)
getSizeMsg.addMsgHeaderFeatureFlag(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR_SECOND);
}
getSizeMsg.setMsgHeaderUserID(msgUserID);
// prepare communication
RequestResponseTarget rrTarget(targetID, app->getTargetMapper(), app->getStorageNodes() );
rrTarget.setTargetStates(app->getTargetStateStore() );
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
rrTarget.setMirrorInfo(app->getStorageBuddyGroupMapper(), useBuddyMirrorSecond);
RequestResponseArgs rrArgs(NULL, &getSizeMsg, NETMSGTYPE_GetChunkFileAttribsResp);
// communicate
FhgfsOpsErr requestRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
{ // communication error
LogContext(logContext).log(Log_WARNING,
"Communication with storage target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return requestRes;
}
// correct response type received
auto* getSizeRespMsg = (GetChunkFileAttribsRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr getSizeResult = getSizeRespMsg->getResult();
if(getSizeResult != FhgfsOpsErr_SUCCESS)
{ // error: chunk file not unlinked
LogContext(logContext).log(Log_WARNING,
"Getting chunk file attributes from target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return getSizeResult;
}
// success: chunk file dynamic attribs refreshed
DynamicFileAttribs currentDynAttribs(getSizeRespMsg->getStorageVersion(),
getSizeRespMsg->getSize(), getSizeRespMsg->getAllocedBlocks(),
getSizeRespMsg->getModificationTimeSecs(), getSizeRespMsg->getLastAccessTimeSecs() );
*outDynAttribs = currentDynAttribs;
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,64 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/net/sock/Socket.h>
#include <common/storage/striping/StripePattern.h>
#include <common/storage/PathInfo.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <common/storage/striping/ChunkFileInfo.h>
#include <common/Common.h>
class GetChunkFileAttribsWork : public Work
{
public:
/**
* @param pathInfo: Only as reference pointer, not owned by this object
*/
GetChunkFileAttribsWork(const std::string& entryID, StripePattern* pattern, uint16_t targetID,
PathInfo* pathInfo, DynamicFileAttribs *outDynAttribs, FhgfsOpsErr* outResult,
SynchronizedCounter* counter) : entryID(entryID), pattern(pattern), targetID(targetID),
pathInfo(pathInfo), outDynAttribs(outDynAttribs), outResult(outResult), counter(counter),
useBuddyMirrorSecond(false), msgUserID(NETMSG_DEFAULT_USERID)
{
// all assignments done in initializer list
}
virtual ~GetChunkFileAttribsWork()
{
}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
std::string entryID;
StripePattern* pattern;
uint16_t targetID;
PathInfo *pathInfo; // only as reference ptr, not owned by this object!
DynamicFileAttribs* outDynAttribs;
FhgfsOpsErr* outResult;
SynchronizedCounter* counter;
bool useBuddyMirrorSecond;
unsigned msgUserID; // only used for msg header info
FhgfsOpsErr communicate();
public:
void setMsgUserID(unsigned msgUserID)
{
this->msgUserID = msgUserID;
}
void setUseBuddyMirrorSecond()
{
this->useBuddyMirrorSecond = true;
}
};

View File

@@ -0,0 +1,218 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/net/message/control/AckMsg.h>
#include <common/net/message/session/locking/LockGrantedMsg.h>
#include <common/net/message/NetMessage.h>
#include <program/Program.h>
#include "LockEntryNotificationWork.h"
#include <mutex>
Mutex LockEntryNotificationWork::ackCounterMutex;
unsigned LockEntryNotificationWork::ackCounter = 0;
void LockEntryNotificationWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen)
{
/* note: this code is very similar to LockRangeNotificationWork, so if you change something here,
you probably want to change it there, too. */
const char* logContext = "LockEntryNotificationWork::process";
App* app = Program::getApp();
Logger* logger = Logger::getLogger();
Config* cfg = app->getConfig();
AcknowledgmentStore* ackStore = app->getAckStore();
DatagramListener* dgramLis = app->getDatagramListener();
MetaStore* metaStore = app->getMetaStore();
NodeStoreClients* clients = app->getClientNodes();
NumNodeID localNodeID = app->getLocalNode().getNumID();
// max total time is ackWaitMS * numRetries, defaults to 333ms * 15 => 5s
int ackWaitSleepMS = cfg->getTuneLockGrantWaitMS();
int numRetriesLeft = cfg->getTuneLockGrantNumRetries();
WaitAckMap waitAcks;
WaitAckMap receivedAcks;
WaitAckNotification notifier;
bool allAcksReceived = false;
// note: we use uint for tv_sec (not uint64) because 32 bits are enough here
// gives string like this: "time-counter-elck-"
std::string ackIDPrefix =
StringTk::uintToHexStr(TimeAbs().getTimeval()->tv_sec) + "-" +
StringTk::uintToHexStr(incAckCounter() ) + "-" "elck" "-";
if (notifyList.empty())
return; // nothing to be done
// create and register waitAcks
/* note: waitAcks store pointers to notifyList items, so make sure to not remove anything from
the list while we're still using the waitAcks pointers */
for (LockEntryNotifyListIter iter = notifyList.begin(); iter != notifyList.end(); iter++)
{
std::string ackID = ackIDPrefix + iter->lockAckID; // (we assume lockAckID is globally unique)
WaitAck waitAck(ackID, &(*iter) );
waitAcks.insert(WaitAckMapVal(ackID, waitAck) );
}
ackStore->registerWaitAcks(&waitAcks, &receivedAcks, &notifier);
// loop: send requests -> waitforcompletion -> resend
while(numRetriesLeft && !app->getSelfTerminate() )
{
// create waitAcks copy
WaitAckMap currentWaitAcks;
{
const std::lock_guard<Mutex> lock (notifier.waitAcksMutex);
currentWaitAcks = waitAcks;
}
// send messages
for(WaitAckMapIter iter = currentWaitAcks.begin(); iter != currentWaitAcks.end(); iter++)
{
EntryLockDetails* lockDetails = (EntryLockDetails*)iter->second.privateData;
LockGrantedMsg msg(lockDetails->lockAckID, iter->first, localNodeID);
std::pair<bool, unsigned> serializeRes = msg.serializeMessage(bufOut, bufOutLen);
if(unlikely(!serializeRes.first) )
{ // buffer too small - should never happen
logger->log(Log_CRITICAL, logContext, "BUG(?): Buffer too small for message "
"serialization: " + StringTk::intToStr(bufOutLen) + "/" +
StringTk::intToStr(serializeRes.second) );
continue;
}
auto node = clients->referenceNode(lockDetails->clientNumID);
if(unlikely(!node) )
{ // node not exists
logger->log(Log_DEBUG, logContext, "Cannot grant lock to unknown client: " +
lockDetails->clientNumID.str());
continue;
}
dgramLis->sendBufToNode(*node, bufOut, serializeRes.second);
}
// wait for acks
allAcksReceived = ackStore->waitForAckCompletion(&currentWaitAcks, &notifier, ackWaitSleepMS);
if(allAcksReceived)
break; // all acks received
// some waitAcks left => prepare next loop
numRetriesLeft--;
}
// waiting for acks is over
ackStore->unregisterWaitAcks(&waitAcks);
// check and handle results (waitAcks now contains all unreceived acks)
if (waitAcks.empty())
{
LOG_DBG(GENERAL, DEBUG, "Stats: received all acks.", receivedAcks.size(), notifyList.size());
return; // perfect, all acks received
}
// some acks were missing...
logger->log(Log_DEBUG, logContext, "Some replies to lock grants missing. Received: " +
StringTk::intToStr(receivedAcks.size() ) + "/" +
StringTk::intToStr(receivedAcks.size() + waitAcks.size() ) );
// the inode is supposed to be be referenced already
MetaFileHandle inode = metaStore->referenceLoadedFile(this->parentEntryID, this->isBuddyMirrored,
this->entryID);
if(unlikely(!inode) )
{ // locked inode cannot be referenced
logger->log(Log_DEBUG, logContext, "FileID cannot be referenced (file unlinked?): " +
this->entryID);
return;
}
// unlock all locks for which we didn't receive an ack
for(WaitAckMapIter iter = waitAcks.begin(); iter != waitAcks.end(); iter++)
{
EntryLockDetails* lockDetails = (EntryLockDetails*)iter->second.privateData;
unlockWaiter(*inode, lockDetails);
LOG_DEBUG(logContext, Log_DEBUG, "Reply was missing from: " + lockDetails->clientNumID.str());
}
// cancel all remaining lock waiters if too many acks were missing
// (this is very important to avoid long timeouts if multiple clients are gone/disconnected)
if(waitAcks.size() > 1)
{ // cancel all waiters
cancelAllWaiters(*inode);
}
// cleanup
metaStore->releaseFile(this->parentEntryID, inode);
}
/**
* Remove lock of a waiter from which we didn't receive an ack.
*/
void LockEntryNotificationWork::unlockWaiter(FileInode& inode, EntryLockDetails* lockDetails)
{
lockDetails->setUnlock();
if(lockType == LockEntryNotifyType_APPEND)
inode.flockAppend(*lockDetails);
else
if(lockType == LockEntryNotifyType_FLOCK)
inode.flockEntry(*lockDetails);
else
LOG(GENERAL, ERR, "Invalid lockType given.", lockType);
}
/**
* Cancel all remaining lock waiters.
*
* Usually called because too many acks were not received and we want to avoid repeated long
* timeout stalls.
*/
void LockEntryNotificationWork::cancelAllWaiters(FileInode& inode)
{
if(lockType == LockEntryNotifyType_APPEND)
inode.flockAppendCancelAllWaiters();
else
if(lockType == LockEntryNotifyType_FLOCK)
inode.flockEntryCancelAllWaiters();
else
LOG(GENERAL, ERR, "Invalid lockType given.", lockType);
}
unsigned LockEntryNotificationWork::incAckCounter()
{
const std::lock_guard<Mutex> lock(ackCounterMutex);
return ackCounter++;
}
Mutex* LockEntryNotificationWork::getDGramLisMutex(AbstractDatagramListener* dgramLis)
{
return dgramLis->getSendMutex();
}

View File

@@ -0,0 +1,57 @@
#pragma once
#include <common/Common.h>
#include <common/components/worker/Work.h>
#include <common/components/AbstractDatagramListener.h>
#include <common/storage/StorageErrors.h>
#include <storage/Locking.h>
class FileInode; // forward declaration
typedef std::list<EntryLockDetails> LockEntryNotifyList;
typedef LockEntryNotifyList::iterator LockEntryNotifyListIter;
typedef LockEntryNotifyList::const_iterator LockEntryNotifyListCIter;
class LockEntryNotificationWork : public Work
{
public:
/**
* @param notifyList will be owned and freed by this object, so do not use or free it after
* calling this.
*/
LockEntryNotificationWork(LockEntryNotifyType lockType, const std::string& parentEntryID,
const std::string& entryID, bool isBuddyMirrored, LockEntryNotifyList notifyList) :
lockType(lockType), parentEntryID(parentEntryID), entryID(entryID),
isBuddyMirrored(isBuddyMirrored), notifyList(std::move(notifyList))
{
}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
// static attributes & methods
static Mutex ackCounterMutex;
static unsigned ackCounter;
static unsigned incAckCounter();
// instance attributes & methods
LockEntryNotifyType lockType;
std::string parentEntryID;
std::string entryID;
bool isBuddyMirrored;
LockEntryNotifyList notifyList;
void unlockWaiter(FileInode& inode, EntryLockDetails* lockDetails);
void cancelAllWaiters(FileInode& inode);
Mutex* getDGramLisMutex(AbstractDatagramListener* dgramLis);
};

View File

@@ -0,0 +1,185 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/net/message/control/AckMsg.h>
#include <common/net/message/session/locking/LockGrantedMsg.h>
#include <common/net/message/NetMessage.h>
#include <program/Program.h>
#include "LockRangeNotificationWork.h"
#include <mutex>
Mutex LockRangeNotificationWork::ackCounterMutex;
unsigned LockRangeNotificationWork::ackCounter = 0;
void LockRangeNotificationWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen)
{
/* note: this code is very similar to LockEntryNotificationWork, so if you change something here,
you probably want to change it there, too. */
const char* logContext = __func__;
App* app = Program::getApp();
Logger* logger = Logger::getLogger();
Config* cfg = app->getConfig();
AcknowledgmentStore* ackStore = app->getAckStore();
DatagramListener* dgramLis = app->getDatagramListener();
MetaStore* metaStore = app->getMetaStore();
NodeStoreClients* clients = app->getClientNodes();
NumNodeID localNodeID = app->getLocalNode().getNumID();
// max total time is ackWaitMS * numRetries, defaults to 333ms * 15 => 5s
int ackWaitSleepMS = cfg->getTuneLockGrantWaitMS();
int numRetriesLeft = cfg->getTuneLockGrantNumRetries();
WaitAckMap waitAcks;
WaitAckMap receivedAcks;
WaitAckNotification notifier;
bool allAcksReceived = false;
// note: we use uint for tv_sec (not uint64) because 32 bits are enough here
std::string ackIDPrefix =
StringTk::uintToHexStr(TimeAbs().getTimeval()->tv_sec) + "-" +
StringTk::uintToHexStr(incAckCounter() ) + "-"
"rlck" "-";
if (notifyList.empty())
return; // nothing to be done
// create and register waitAcks
/* note: waitAcks store pointers to notifyList items, so make sure to not remove anything from
the list while we're still using the waitAcks pointers */
for (LockRangeNotifyListIter iter = notifyList.begin(); iter != notifyList.end(); iter++)
{
std::string ackID = ackIDPrefix + iter->lockAckID; // (we assume lockAckID is globally unique)
WaitAck waitAck(ackID, &(*iter) );
waitAcks.insert(WaitAckMapVal(ackID, waitAck) );
}
ackStore->registerWaitAcks(&waitAcks, &receivedAcks, &notifier);
// loop: send requests -> waitforcompletion -> resend
while(numRetriesLeft && !app->getSelfTerminate() )
{
// create waitAcks copy
WaitAckMap currentWaitAcks;
{
const std::lock_guard<Mutex> lock(notifier.waitAcksMutex);
currentWaitAcks = waitAcks;
}
// send messages
for(WaitAckMapIter iter = currentWaitAcks.begin(); iter != currentWaitAcks.end(); iter++)
{
RangeLockDetails* lockDetails = (RangeLockDetails*)iter->second.privateData;
LockGrantedMsg msg(lockDetails->lockAckID, iter->first, localNodeID);
std::pair<bool, unsigned> serializeRes = msg.serializeMessage(bufOut, bufOutLen);
if(unlikely(!serializeRes.first) )
{ // buffer too small - should never happen
logger->log(Log_CRITICAL, logContext, "BUG(?): Buffer too small for message "
"serialization: " + StringTk::intToStr(bufOutLen) + "/" +
StringTk::intToStr(serializeRes.second) );
continue;
}
auto node = clients->referenceNode(lockDetails->clientNumID);
if(unlikely(!node) )
{ // node not exists
logger->log(Log_DEBUG, logContext, "Cannot grant lock to unknown client: " +
lockDetails->clientNumID.str());
continue;
}
dgramLis->sendBufToNode(*node, bufOut, serializeRes.second);
}
// wait for acks
allAcksReceived = ackStore->waitForAckCompletion(&currentWaitAcks, &notifier, ackWaitSleepMS);
if(allAcksReceived)
break; // all acks received
// some waitAcks left => prepare next loop
numRetriesLeft--;
}
// waiting for acks is over
ackStore->unregisterWaitAcks(&waitAcks);
// check and handle results (waitAcks now contains all unreceived acks)
if (waitAcks.empty())
{
LOG_DBG(GENERAL, DEBUG, "Stats: received all acks.", receivedAcks.size(), notifyList.size());
return; // perfect, all acks received
}
// some acks were missing...
logger->log(Log_DEBUG, logContext, "Some replies to lock grants missing. Received: " +
StringTk::intToStr(receivedAcks.size() ) + "/" +
StringTk::intToStr(receivedAcks.size() + waitAcks.size() ) );
// the inode is supposed to be be referenced already
MetaFileHandle inode = metaStore->referenceLoadedFile(this->parentEntryID, this->isBuddyMirrored,
this->entryID);
if(unlikely(!inode) )
{ // locked inode cannot be referenced
logger->log(Log_DEBUG, logContext, "FileID cannot be referenced (file unlinked?): "
+ this->entryID);
return;
}
// unlock all locks for which we didn't receive an ack
for(WaitAckMapIter iter = waitAcks.begin(); iter != waitAcks.end(); iter++)
{
RangeLockDetails* lockDetails = (RangeLockDetails*)iter->second.privateData;
lockDetails->setUnlock();
inode->flockRange(*lockDetails);
LOG_DEBUG(logContext, Log_DEBUG, "Reply was missing from: " + lockDetails->clientNumID.str());
}
// cancel all remaining lock waiters if too many acks were missing
// (this is very important to avoid long timeouts it multiple clients are gone/disconnected)
if(waitAcks.size() > 1)
{ // cancel all waiters
inode->flockRangeCancelAllWaiters();
}
// cleanup
metaStore->releaseFile(this->parentEntryID, inode);
}
unsigned LockRangeNotificationWork::incAckCounter()
{
const std::lock_guard<Mutex> lock(ackCounterMutex);
return ackCounter++;
}
Mutex* LockRangeNotificationWork::getDGramLisMutex(AbstractDatagramListener* dgramLis)
{
return dgramLis->getSendMutex();
}

View File

@@ -0,0 +1,49 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/components/AbstractDatagramListener.h>
#include <common/Common.h>
typedef std::list<RangeLockDetails> LockRangeNotifyList;
typedef LockRangeNotifyList::iterator LockRangeNotifyListIter;
typedef LockRangeNotifyList::const_iterator LockRangeNotifyListCIter;
class LockRangeNotificationWork : public Work
{
public:
/**
* @param notifyList will be owned and freed by this object, so do not use or free it after
* calling this.
*/
LockRangeNotificationWork(const std::string& parentEntryID, const std::string& entryID,
bool isBuddyMirrored, LockRangeNotifyList notifyList):
parentEntryID(parentEntryID), entryID(entryID), isBuddyMirrored(isBuddyMirrored),
notifyList(std::move(notifyList))
{
/* all assignments done in initializer list */
}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
// static attributes & methods
static Mutex ackCounterMutex;
static unsigned ackCounter;
static unsigned incAckCounter();
// instance attributes & methods
std::string parentEntryID;
std::string entryID;
bool isBuddyMirrored;
LockRangeNotifyList notifyList;
Mutex* getDGramLisMutex(AbstractDatagramListener* dgramLis);
};

View File

@@ -0,0 +1,97 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/threading/PThread.h>
#include <common/net/message/storage/attribs/SetLocalAttrMsg.h>
#include <common/net/message/storage/attribs/SetLocalAttrRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <components/worker/SetChunkFileAttribsWork.h>
#include <program/Program.h>
void SetChunkFileAttribsWork::process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen)
{
FhgfsOpsErr commRes = communicate();
*outResult = commRes;
counter->incCount();
}
FhgfsOpsErr SetChunkFileAttribsWork::communicate()
{
const char* logContext = "Set chunk file attribs work";
App* app = Program::getApp();
SetLocalAttrMsg setAttrMsg(entryID, targetID, pathInfo, validAttribs, attribs, enableCreation);
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
{
setAttrMsg.addMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_BUDDYMIRROR);
if(useBuddyMirrorSecond)
setAttrMsg.addMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND);
}
if(quotaChown)
setAttrMsg.addMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_USE_QUOTA);
setAttrMsg.setMsgHeaderUserID(msgUserID);
// prepare communication
RequestResponseTarget rrTarget(targetID, app->getTargetMapper(), app->getStorageNodes() );
rrTarget.setTargetStates(app->getTargetStateStore() );
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
rrTarget.setMirrorInfo(app->getStorageBuddyGroupMapper(), useBuddyMirrorSecond);
RequestResponseArgs rrArgs(NULL, &setAttrMsg, NETMSGTYPE_SetLocalAttrResp);
// communicate
FhgfsOpsErr requestRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
{ // communication error
LogContext(logContext).log(Log_WARNING,
"Communication with storage target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"entryID: " + entryID);
return requestRes;
}
// correct response type received
const auto setRespMsg = (const SetLocalAttrRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr setRespVal = setRespMsg->getResult();
if(setRespVal != FhgfsOpsErr_SUCCESS)
{ // error occurred
LogContext(logContext).log(Log_WARNING,
"Setting chunk file attributes on target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return setRespVal;
}
// success
LOG_DEBUG(logContext, Log_DEBUG,
"Set attribs of chunk file on target. " +
std::string( (pattern->getPatternType() == StripePatternType_BuddyMirror) ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
if ((outDynamicAttribs)
&& (setRespMsg->isMsgHeaderFeatureFlagSet(SETLOCALATTRRESPMSG_FLAG_HAS_ATTRS)))
{
setRespMsg->getDynamicAttribs(outDynamicAttribs);
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,79 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/net/sock/Socket.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageErrors.h>
#include <common/storage/striping/ChunkFileInfo.h>
#include <common/storage/striping/StripePattern.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <common/Common.h>
class SetChunkFileAttribsWork : public Work
{
public:
/**
* @param pathInfo just a reference, so do not free it as long as you use this object!
*/
SetChunkFileAttribsWork(const std::string& entryID, int validAttribs,
SettableFileAttribs* attribs, bool enableCreation, StripePattern* pattern,
uint16_t targetID, PathInfo* pathInfo, DynamicFileAttribs* outDynamicAttribs,
FhgfsOpsErr* outResult, SynchronizedCounter* counter) :
entryID(entryID), validAttribs(validAttribs), attribs(attribs),
enableCreation(enableCreation), pattern(pattern), targetID(targetID),
pathInfo(pathInfo), outDynamicAttribs(outDynamicAttribs),
outResult(outResult), counter(counter), quotaChown(false),
useBuddyMirrorSecond(false), msgUserID(NETMSG_DEFAULT_USERID)
{
// all assignments done in initializer list
}
virtual ~SetChunkFileAttribsWork() {}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
std::string entryID;
int validAttribs;
SettableFileAttribs* attribs;
bool enableCreation;
StripePattern* pattern;
uint16_t targetID;
PathInfo* pathInfo;
DynamicFileAttribs* outDynamicAttribs; // will hold the chunks dynamic attribs as stat'ed on
// the storage server
FhgfsOpsErr* outResult;
SynchronizedCounter* counter;
bool quotaChown;
bool useBuddyMirrorSecond;
unsigned msgUserID; // only used for msg header info
FhgfsOpsErr communicate();
public:
// getters & setters
void setQuotaChown(bool quotaChown)
{
this->quotaChown = quotaChown;
}
void setMsgUserID(unsigned msgUserID)
{
this->msgUserID = msgUserID;
}
void setUseBuddyMirrorSecond()
{
this->useBuddyMirrorSecond = true;
}
};

View File

@@ -0,0 +1,111 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/threading/PThread.h>
#include <common/net/message/storage/TruncLocalFileMsg.h>
#include <common/net/message/storage/TruncLocalFileRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <components/worker/TruncChunkFileWork.h>
#include <program/Program.h>
#include <boost/lexical_cast.hpp>
void TruncChunkFileWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
{
FhgfsOpsErr commRes = communicate();
*outResult = commRes;
counter->incCount();
}
FhgfsOpsErr TruncChunkFileWork::communicate()
{
const char* logContext = "Trunc chunk file work";
App* app = Program::getApp();
TruncLocalFileMsg truncMsg(filesize, entryID, targetID, pathInfo);
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
{
truncMsg.addMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR);
if(useBuddyMirrorSecond)
{
truncMsg.addMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_NODYNAMICATTRIBS);
truncMsg.addMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
}
}
if(useQuota)
truncMsg.setUserdataForQuota(userID, groupID);
truncMsg.setMsgHeaderUserID(msgUserID);
// prepare communication
RequestResponseTarget rrTarget(targetID, app->getTargetMapper(), app->getStorageNodes() );
rrTarget.setTargetStates(app->getTargetStateStore() );
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
rrTarget.setMirrorInfo(app->getStorageBuddyGroupMapper(), useBuddyMirrorSecond);
RequestResponseArgs rrArgs(NULL, &truncMsg, NETMSGTYPE_TruncLocalFileResp);
// communicate
FhgfsOpsErr requestRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
{ // communication error
LogContext(logContext).log(Log_WARNING,
"Communication with storage target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return requestRes;
}
// correct response type received
TruncLocalFileRespMsg* truncRespMsg = (TruncLocalFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr truncRespVal = truncRespMsg->getResult();
// set current dynamic attribs (even if result not success, because then storageVersion==0)
if(outDynAttribs)
{
DynamicFileAttribs currentDynAttribs(truncRespMsg->getStorageVersion(),
truncRespMsg->getFileSize(), truncRespMsg->getAllocedBlocks(),
truncRespMsg->getModificationTimeSecs(), truncRespMsg->getLastAccessTimeSecs() );
*outDynAttribs = currentDynAttribs;
}
if(unlikely(truncRespVal != FhgfsOpsErr_SUCCESS) )
{ // error: chunk file not truncated
if(truncRespVal == FhgfsOpsErr_TOOBIG)
return truncRespVal; // will be passed through to user app on client, so don't log here
LogContext(logContext).log(Log_WARNING,
"Truncation of chunk file on target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID + "; "
"Error: " + boost::lexical_cast<std::string>(truncRespVal) );
return truncRespVal;
}
// success: chunk file truncated
LOG_DEBUG(logContext, Log_DEBUG,
"Chunk file truncated on target. " +
std::string( (pattern->getPatternType() == StripePatternType_BuddyMirror) ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,78 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/net/sock/Socket.h>
#include <common/storage/striping/StripePattern.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <common/storage/striping/ChunkFileInfo.h>
#include <common/Common.h>
/**
* Truncate file on storage servers
*/
class TruncChunkFileWork : public Work
{
public:
/**
* @param outDynAttribs may be NULL if caller is not interested
*/
TruncChunkFileWork(const std::string& entryID, int64_t filesize, StripePattern* pattern,
uint16_t targetID, PathInfo* pathInfo, DynamicFileAttribs *outDynAttribs,
FhgfsOpsErr* outResult, SynchronizedCounter* counter) :
entryID(entryID), filesize(filesize), pattern(pattern), targetID(targetID),
pathInfo(pathInfo), outDynAttribs(outDynAttribs), outResult(outResult), counter(counter),
useQuota(false), useBuddyMirrorSecond(false), msgUserID(NETMSG_DEFAULT_USERID)
{
// all assignments done in initializer list
}
virtual ~TruncChunkFileWork() {}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
std::string entryID;
int64_t filesize; // already converted to storage node's local file size
StripePattern* pattern;
uint16_t targetID;
PathInfo* pathInfo; // note: not owned by this object
DynamicFileAttribs* outDynAttribs;
FhgfsOpsErr* outResult;
SynchronizedCounter* counter;
unsigned userID;
unsigned groupID;
bool useQuota;
bool useBuddyMirrorSecond;
unsigned msgUserID; // only used for msg header info
FhgfsOpsErr communicate();
public:
// getters & setters
void setUserdataForQuota(unsigned userID, unsigned groupID)
{
this->useQuota = true;
this->userID = userID;
this->groupID = groupID;
}
void setMsgUserID(unsigned msgUserID)
{
this->msgUserID = msgUserID;
}
void setUseBuddyMirrorSecond()
{
this->useBuddyMirrorSecond = true;
}
};

View File

@@ -0,0 +1,87 @@
#include <common/app/log/LogContext.h>
#include <common/app/AbstractApp.h>
#include <common/threading/PThread.h>
#include <common/net/message/storage/creating/UnlinkLocalFileMsg.h>
#include <common/net/message/storage/creating/UnlinkLocalFileRespMsg.h>
#include <common/net/message/NetMessage.h>
#include <components/worker/UnlinkChunkFileWork.h>
#include <program/Program.h>
void UnlinkChunkFileWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
{
FhgfsOpsErr commRes = communicate();
*outResult = commRes;
counter->incCount();
}
FhgfsOpsErr UnlinkChunkFileWork::communicate()
{
const char* logContext = "Unlink chunk file work";
App* app = Program::getApp();
UnlinkLocalFileMsg unlinkMsg(entryID, targetID, pathInfo);
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
{
unlinkMsg.addMsgHeaderFeatureFlag(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR);
if(useBuddyMirrorSecond)
unlinkMsg.addMsgHeaderFeatureFlag(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
}
unlinkMsg.setMsgHeaderUserID(msgUserID);
// prepare communication
RequestResponseTarget rrTarget(targetID, app->getTargetMapper(), app->getStorageNodes() );
rrTarget.setTargetStates(app->getTargetStateStore() );
if(pattern->getPatternType() == StripePatternType_BuddyMirror)
rrTarget.setMirrorInfo(app->getStorageBuddyGroupMapper(), useBuddyMirrorSecond);
RequestResponseArgs rrArgs(NULL, &unlinkMsg, NETMSGTYPE_UnlinkLocalFileResp);
// communicate
FhgfsOpsErr requestRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
{ // communication error
LogContext(logContext).log(Log_WARNING,
"Communication with storage target failed. " +
std::string( (pattern->getPatternType() == StripePatternType_BuddyMirror) ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return requestRes;
}
// correct response type received
UnlinkLocalFileRespMsg* unlinkRespMsg = (UnlinkLocalFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr unlinkResult = unlinkRespMsg->getResult();
if(unlinkResult != FhgfsOpsErr_SUCCESS)
{ // error: local file not unlinked
LogContext(logContext).log(Log_WARNING,
"Unlinking of chunk file from target failed. " +
std::string(pattern->getPatternType() == StripePatternType_BuddyMirror ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return unlinkResult;
}
// success: chunk file unlinked
LOG_DEBUG(logContext, Log_DEBUG,
"Chunk file unlinked from target. " +
std::string( (pattern->getPatternType() == StripePatternType_BuddyMirror) ? "Mirror " : "") +
"TargetID: " + StringTk::uintToStr(targetID) + "; "
"EntryID: " + entryID);
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,62 @@
#pragma once
#include <common/components/worker/Work.h>
#include <common/net/sock/Socket.h>
#include <common/storage/striping/StripePattern.h>
#include <common/storage/PathInfo.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/SynchronizedCounter.h>
#include <common/storage/striping/ChunkFileInfo.h>
#include <common/Common.h>
class UnlinkChunkFileWork : public Work
{
public:
/**
* @param pathInfo just a reference, so do not free it as long as you use this object!
*/
UnlinkChunkFileWork(const std::string& entryID, StripePattern* pattern, uint16_t targetID,
PathInfo* pathInfo, FhgfsOpsErr* outResult, SynchronizedCounter* counter) :
entryID(entryID), pattern(pattern), targetID(targetID), pathInfo(pathInfo),
outResult(outResult), counter(counter), useBuddyMirrorSecond(false),
msgUserID(NETMSG_DEFAULT_USERID)
{
// all assignments done in initializer list
}
virtual ~UnlinkChunkFileWork() {}
virtual void process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen);
private:
std::string entryID;
StripePattern* pattern;
uint16_t targetID;
PathInfo* pathInfo;
FhgfsOpsErr* outResult;
SynchronizedCounter* counter;
bool useBuddyMirrorSecond;
unsigned msgUserID;
FhgfsOpsErr communicate();
public:
// getters & setters
void setMsgUserID(unsigned msgUserID)
{
this->msgUserID = msgUserID;
}
void setUseBuddyMirrorSecond()
{
this->useBuddyMirrorSecond = true;
}
};