New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

View File

@@ -0,0 +1,203 @@
// control messages
#include <common/net/message/control/AuthenticateChannelMsgEx.h>
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/control/PeerInfoMsgEx.h>
#include "control/AckMsgEx.h"
#include "control/SetChannelDirectMsgEx.h"
// nodes messages
#include <common/net/message/nodes/ChangeTargetConsistencyStatesRespMsg.h>
#include <common/net/message/nodes/GetMirrorBuddyGroupsRespMsg.h>
#include <common/net/message/nodes/GetNodesRespMsg.h>
#include <common/net/message/nodes/GetStatesAndBuddyGroupsRespMsg.h>
#include <common/net/message/nodes/storagepools/GetStoragePoolsRespMsg.h>
#include <common/net/message/nodes/GetTargetMappingsRespMsg.h>
#include <common/net/message/nodes/GetTargetStatesRespMsg.h>
#include <common/net/message/nodes/MapTargetsRespMsg.h>
#include <common/net/message/nodes/RegisterNodeRespMsg.h>
#include <common/net/message/nodes/RegisterTargetRespMsg.h>
#include <common/net/message/nodes/RemoveNodeRespMsg.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <common/net/message/nodes/GetTargetConsistencyStatesRespMsg.h>
#include <net/message/nodes/GenericDebugMsgEx.h>
#include <net/message/nodes/GetClientStatsMsgEx.h>
#include <net/message/nodes/HeartbeatMsgEx.h>
#include <net/message/nodes/HeartbeatRequestMsgEx.h>
#include <net/message/nodes/MapTargetsMsgEx.h>
#include <net/message/nodes/PublishCapacitiesMsgEx.h>
#include <net/message/nodes/RefreshTargetStatesMsgEx.h>
#include <net/message/nodes/RemoveBuddyGroupMsgEx.h>
#include <net/message/nodes/RemoveNodeMsgEx.h>
#include <net/message/nodes/SetMirrorBuddyGroupMsgEx.h>
#include <net/message/nodes/SetTargetConsistencyStatesMsgEx.h>
#include <net/message/nodes/GetTargetConsistencyStatesMsgEx.h>
// storage messages
#include <common/net/message/storage/attribs/SetLocalAttrRespMsg.h>
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <common/net/message/storage/creating/UnlinkLocalFileRespMsg.h>
#include <common/net/message/storage/listing/ListChunkDirIncrementalRespMsg.h>
#include <common/net/message/storage/lookup/FindOwnerRespMsg.h>
#include <common/net/message/storage/mirroring/ResyncLocalFileRespMsg.h>
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
#include <common/net/message/storage/quota/GetQuotaInfoMsg.h>
#include <common/net/message/storage/quota/RequestExceededQuotaRespMsg.h>
#include <common/net/message/storage/TruncLocalFileRespMsg.h>
#include <common/net/message/storage/SetStorageTargetInfoRespMsg.h>
#include <net/message/storage/attribs/GetChunkFileAttribsMsgEx.h>
#include <net/message/storage/attribs/SetLocalAttrMsgEx.h>
#include <net/message/storage/creating/RmChunkPathsMsgEx.h>
#include <net/message/storage/creating/UnlinkLocalFileMsgEx.h>
#include <net/message/storage/listing/ListChunkDirIncrementalMsgEx.h>
#include <net/message/storage/mirroring/GetStorageResyncStatsMsgEx.h>
#include <net/message/storage/mirroring/ResyncLocalFileMsgEx.h>
#include <net/message/storage/mirroring/SetLastBuddyCommOverrideMsgEx.h>
#include <net/message/storage/mirroring/StorageResyncStartedMsgEx.h>
#include <net/message/storage/quota/GetQuotaInfoMsgEx.h>
#include <net/message/storage/quota/SetExceededQuotaMsgEx.h>
#include <net/message/storage/GetHighResStatsMsgEx.h>
#include <net/message/storage/StatStoragePathMsgEx.h>
#include <net/message/storage/TruncLocalFileMsgEx.h>
// session messages
#include <common/net/message/session/opening/CloseChunkFileRespMsg.h>
#include <common/net/message/session/rw/WriteLocalFileRespMsg.h>
#include <net/message/session/opening/CloseChunkFileMsgEx.h>
#include <net/message/session/rw/ReadLocalFileV2MsgEx.h>
#include <net/message/session/rw/WriteLocalFileMsgEx.h>
#include <net/message/session/FSyncLocalFileMsgEx.h>
#ifdef BEEGFS_NVFS
#include <net/message/session/rw/ReadLocalFileRDMAMsgEx.h>
#include <net/message/session/rw/WriteLocalFileRDMAMsgEx.h>
#endif /* BEEGFS_NVFS */
// mon messages
#include <net/message/mon/RequestStorageDataMsgEx.h>
// fsck
#include <net/message/fsck/DeleteChunksMsgEx.h>
#include <net/message/fsck/FetchFsckChunkListMsgEx.h>
#include <net/message/fsck/MoveChunkFileMsgEx.h>
// storage benchmark
#include <common/net/message/nodes/StorageBenchControlMsg.h>
#include <net/message/nodes/StorageBenchControlMsgEx.h>
// chunk balancing
#include <common/net/message/storage/chunkbalancing/StripePatternUpdateRespMsg.h>
#include <common/net/message/storage/chunkbalancing/CpChunkPathsRespMsg.h>
#include <net/message/storage/chunkbalancing/CpChunkPathsMsgEx.h>
#include <common/net/message/SimpleMsg.h>
#include <net/message/nodes/storagepools/RefreshStoragePoolsMsgEx.h>
#include "NetMessageFactory.h"
/**
* @return NetMessage that must be deleted by the caller
* (msg->msgType is NETMSGTYPE_Invalid on error)
*/
std::unique_ptr<NetMessage> NetMessageFactory::createFromMsgType(unsigned short msgType) const
{
NetMessage* msg;
switch(msgType)
{
// The following lines are grouped by "type of the message" and ordered alphabetically inside
// the groups. There should always be one message per line to keep a clear layout (although
// this might lead to lines that are longer than usual)
// control messages
case NETMSGTYPE_Ack: { msg = new AckMsgEx(); } break;
case NETMSGTYPE_AuthenticateChannel: { msg = new AuthenticateChannelMsgEx(); } break;
case NETMSGTYPE_GenericResponse: { msg = new GenericResponseMsg(); } break;
case NETMSGTYPE_SetChannelDirect: { msg = new SetChannelDirectMsgEx(); } break;
case NETMSGTYPE_PeerInfo: { msg = new PeerInfoMsgEx(); } break;
// nodes messages
case NETMSGTYPE_ChangeTargetConsistencyStatesResp: { msg = new ChangeTargetConsistencyStatesRespMsg(); } break;
case NETMSGTYPE_GenericDebug: { msg = new GenericDebugMsgEx(); } break;
case NETMSGTYPE_GetClientStats: { msg = new GetClientStatsMsgEx(); } break;
case NETMSGTYPE_GetMirrorBuddyGroupsResp: { msg = new GetMirrorBuddyGroupsRespMsg(); } break;
case NETMSGTYPE_GetNodesResp: { msg = new GetNodesRespMsg(); } break;
case NETMSGTYPE_GetStatesAndBuddyGroupsResp: { msg = new GetStatesAndBuddyGroupsRespMsg(); } break;
case NETMSGTYPE_GetStoragePoolsResp: { msg = new GetStoragePoolsRespMsg(); } break;
case NETMSGTYPE_GetTargetMappingsResp: { msg = new GetTargetMappingsRespMsg(); } break;
case NETMSGTYPE_GetTargetStatesResp: { msg = new GetTargetStatesRespMsg(); } break;
case NETMSGTYPE_HeartbeatRequest: { msg = new HeartbeatRequestMsgEx(); } break;
case NETMSGTYPE_Heartbeat: { msg = new HeartbeatMsgEx(); } break;
case NETMSGTYPE_MapTargets: { msg = new MapTargetsMsgEx(); } break;
case NETMSGTYPE_PublishCapacities: { msg = new PublishCapacitiesMsgEx(); } break;
case NETMSGTYPE_MapTargetsResp: { msg = new MapTargetsRespMsg(); } break;
case NETMSGTYPE_StorageBenchControlMsg: {msg = new StorageBenchControlMsgEx(); } break;
case NETMSGTYPE_RefreshStoragePools: { msg = new RefreshStoragePoolsMsgEx(); } break;
case NETMSGTYPE_RefreshTargetStates: { msg = new RefreshTargetStatesMsgEx(); } break;
case NETMSGTYPE_RegisterNodeResp: { msg = new RegisterNodeRespMsg(); } break;
case NETMSGTYPE_RegisterTargetResp: { msg = new RegisterTargetRespMsg(); } break;
case NETMSGTYPE_RemoveBuddyGroup: { msg = new RemoveBuddyGroupMsgEx(); } break;
case NETMSGTYPE_RemoveNode: { msg = new RemoveNodeMsgEx(); } break;
case NETMSGTYPE_RemoveNodeResp: { msg = new RemoveNodeRespMsg(); } break;
case NETMSGTYPE_SetMirrorBuddyGroup: { msg = new SetMirrorBuddyGroupMsgEx(); } break;
case NETMSGTYPE_SetTargetConsistencyStates: { msg = new SetTargetConsistencyStatesMsgEx(); } break;
case NETMSGTYPE_SetTargetConsistencyStatesResp: { msg = new SetTargetConsistencyStatesRespMsg(); } break;
case NETMSGTYPE_GetTargetConsistencyStates: { msg = new GetTargetConsistencyStatesMsgEx(); } break;
case NETMSGTYPE_GetTargetConsistencyStatesResp: { msg = new GetTargetConsistencyStatesRespMsg(); } break;
// storage messages
case NETMSGTYPE_CpChunkPaths: { msg = new CpChunkPathsMsgEx(); } break;
case NETMSGTYPE_CpChunkPathsResp: { msg = new CpChunkPathsRespMsg(); } break;
case NETMSGTYPE_FindOwnerResp: { msg = new FindOwnerRespMsg(); } break;
case NETMSGTYPE_GetChunkFileAttribs: { msg = new GetChunkFileAttribsMsgEx(); } break;
case NETMSGTYPE_GetHighResStats: { msg = new GetHighResStatsMsgEx(); } break;
case NETMSGTYPE_GetQuotaInfo: {msg = new GetQuotaInfoMsgEx(); } break;
case NETMSGTYPE_GetStorageResyncStats: { msg = new GetStorageResyncStatsMsgEx(); } break;
case NETMSGTYPE_ListChunkDirIncremental: { msg = new ListChunkDirIncrementalMsgEx(); } break;
case NETMSGTYPE_ListChunkDirIncrementalResp: { msg = new ListChunkDirIncrementalRespMsg(); } break;
case NETMSGTYPE_RequestExceededQuotaResp: {msg = new RequestExceededQuotaRespMsg(); } break;
case NETMSGTYPE_ResyncLocalFile: { msg = new ResyncLocalFileMsgEx(); } break;
case NETMSGTYPE_ResyncLocalFileResp: { msg = new ResyncLocalFileRespMsg(); } break;
case NETMSGTYPE_RmChunkPaths: { msg = new RmChunkPathsMsgEx(); } break;
case NETMSGTYPE_RmChunkPathsResp: { msg = new RmChunkPathsRespMsg(); } break;
case NETMSGTYPE_SetExceededQuota: {msg = new SetExceededQuotaMsgEx(); } break;
case NETMSGTYPE_SetLastBuddyCommOverride: { msg = new SetLastBuddyCommOverrideMsgEx(); } break;
case NETMSGTYPE_SetLocalAttr: { msg = new SetLocalAttrMsgEx(); } break;
case NETMSGTYPE_SetLocalAttrResp: { msg = new SetLocalAttrRespMsg(); } break;
case NETMSGTYPE_SetStorageTargetInfoResp: { msg = new SetStorageTargetInfoRespMsg(); } break;
case NETMSGTYPE_StatStoragePath: { msg = new StatStoragePathMsgEx(); } break;
case NETMSGTYPE_StorageResyncStarted: { msg = new StorageResyncStartedMsgEx(); } break;
case NETMSGTYPE_StorageResyncStartedResp: { msg = new StorageResyncStartedRespMsg(); } break;
case NETMSGTYPE_StripePatternUpdateResp: { msg = new StripePatternUpdateRespMsg(); } break;
case NETMSGTYPE_TruncLocalFile: { msg = new TruncLocalFileMsgEx(); } break;
case NETMSGTYPE_TruncLocalFileResp: { msg = new TruncLocalFileRespMsg(); } break;
case NETMSGTYPE_UnlinkLocalFile: { msg = new UnlinkLocalFileMsgEx(); } break;
case NETMSGTYPE_UnlinkLocalFileResp: { msg = new UnlinkLocalFileRespMsg(); } break;
// session messages
case NETMSGTYPE_CloseChunkFile: { msg = new CloseChunkFileMsgEx(); } break;
case NETMSGTYPE_CloseChunkFileResp: { msg = new CloseChunkFileRespMsg(); } break;
case NETMSGTYPE_FSyncLocalFile: { msg = new FSyncLocalFileMsgEx(); } break;
case NETMSGTYPE_ReadLocalFileV2: { msg = new ReadLocalFileV2MsgEx(); } break;
case NETMSGTYPE_WriteLocalFile: { msg = new WriteLocalFileMsgEx(); } break;
case NETMSGTYPE_WriteLocalFileResp: { msg = new WriteLocalFileRespMsg(); } break;
#ifdef BEEGFS_NVFS
case NETMSGTYPE_ReadLocalFileRDMA: { msg = new ReadLocalFileRDMAMsgEx(); } break;
case NETMSGTYPE_WriteLocalFileRDMA: { msg = new WriteLocalFileRDMAMsgEx(); } break;
#endif // BEEGFS_NVFS
// mon message
case NETMSGTYPE_RequestStorageData: { msg = new RequestStorageDataMsgEx(); } break;
// fsck
case NETMSGTYPE_DeleteChunks: { msg = new DeleteChunksMsgEx(); } break;
case NETMSGTYPE_FetchFsckChunkList: { msg = new FetchFsckChunkListMsgEx(); } break;
case NETMSGTYPE_MoveChunkFile: { msg = new MoveChunkFileMsgEx(); } break;
default:
{
msg = new SimpleMsg(NETMSGTYPE_Invalid);
} break;
}
return std::unique_ptr<NetMessage>(msg);
}

View File

@@ -0,0 +1,14 @@
#pragma once
#include <common/Common.h>
#include <common/net/message/AbstractNetMessageFactory.h>
class NetMessageFactory : public AbstractNetMessageFactory
{
public:
NetMessageFactory() {}
protected:
virtual std::unique_ptr<NetMessage> createFromMsgType(unsigned short msgType) const override;
} ;

View File

@@ -0,0 +1,22 @@
#include <program/Program.h>
#include "AckMsgEx.h"
bool AckMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("Ack incoming");
LOG_DEBUG_CONTEXT(log, 5, std::string("Value: ") + getValue() );
AcknowledgmentStore* ackStore = Program::getApp()->getAckStore();
ackStore->receivedAck(getValue() );
// note: this message does not require a response
App* app = Program::getApp();
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_ACK,
getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <common/net/message/control/AckMsg.h>
// see class AcknowledgeableMsg (fhgfs_common) for a short description
class AckMsgEx : public AckMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,19 @@
#include <program/Program.h>
#include "SetChannelDirectMsgEx.h"
bool SetChannelDirectMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("SetChannelDirect incoming");
LOG_DEBUG_CONTEXT(log, 5, std::string("Value: ") + StringTk::intToStr(getValue() ) );
ctx.getSocket()->setIsDirect(getValue() );
App* app = Program::getApp();
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_SETCHANNELDIRECT, getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <common/net/message/control/SetChannelDirectMsg.h>
// direct means the message is definitely processed on this server and not forwarded to another
class SetChannelDirectMsgEx : public SetChannelDirectMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,60 @@
#include "DeleteChunksMsgEx.h"
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
bool DeleteChunksMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "DeleteChunksMsg incoming";
App* app = Program::getApp();
ChunkStore* chunkDirStore = app->getChunkDirStore();
FsckChunkList& chunks = getChunks();
FsckChunkList failedDeletes;
for ( FsckChunkListIter iter = chunks.begin(); iter != chunks.end(); iter++ )
{
std::string chunkDirRelative;
std::string delPathStrRelative;
bool isMirrorFD = iter->getBuddyGroupID();
chunkDirRelative = iter->getSavedPath()->str();
delPathStrRelative = chunkDirRelative + "/" + iter->getID();
auto* const target = app->getStorageTargets()->getTarget(iter->getTargetID());
if (!target)
{ // unknown targetID
LogContext(logContext).logErr(std::string("Unknown targetID: ") +
StringTk::uintToStr(iter->getTargetID()));
failedDeletes.push_back(*iter);
}
else
{ // valid targetID
int targetFD = isMirrorFD ? *target->getMirrorFD() : *target->getChunkFD();
int unlinkRes = unlinkat(targetFD, delPathStrRelative.c_str(), 0);
if ( (unlinkRes == -1) && (errno != ENOENT) )
{ // error
LogContext(logContext).logErr(
"Unable to unlink file: " + delPathStrRelative + ". " + "SysErr: "
+ System::getErrString());
failedDeletes.push_back(*iter);
}
// Now try to rmdir chunkDirPath (checks if it is empty)
if (unlinkRes == 0)
{
Path chunkDirRelativeVec(chunkDirRelative);
chunkDirStore->rmdirChunkDirPath(targetFD, &chunkDirRelativeVec);
}
}
}
ctx.sendResponse(DeleteChunksRespMsg(&failedDeletes) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/DeleteChunksMsg.h>
#include <common/net/message/fsck/DeleteChunksRespMsg.h>
class DeleteChunksMsgEx : public DeleteChunksMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,53 @@
#include "FetchFsckChunkListMsgEx.h"
#include <program/Program.h>
bool FetchFsckChunkListMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
ChunkFetcher* chunkFetcher = app->getChunkFetcher();
FetchFsckChunkListStatus status;
FsckChunkList chunkList;
if (getLastStatus() == FetchFsckChunkListStatus_NOTSTARTED)
{
// This is the first message of a new Fsck run
if (chunkFetcher->getNumRunning() != 0 || !chunkFetcher->isQueueEmpty())
{
// another fsck is already in progress
if (!getForceRestart())
{
LOG(GENERAL, NOTICE, "Received request to start fsck although previous run is not finished. "
"Not starting.", ("From", ctx.peerName()));
ctx.sendResponse(FetchFsckChunkListRespMsg(&chunkList,
FetchFsckChunkListStatus_NOTSTARTED));
return true;
}
else
{
LOG(GENERAL, NOTICE, "Aborting previous fsck chunk fetcher run by user request.",
("From", ctx.peerName()));
chunkFetcher->stopFetching();
chunkFetcher->waitForStopFetching();
}
}
chunkFetcher->startFetching();
}
if(chunkFetcher->getIsBad())
status = FetchFsckChunkListStatus_READERROR;
else if (chunkFetcher->getNumRunning() == 0)
status = FetchFsckChunkListStatus_FINISHED;
else
status = FetchFsckChunkListStatus_RUNNING;
chunkFetcher->getAndDeleteChunks(chunkList, getMaxNumChunks());
ctx.sendResponse(FetchFsckChunkListRespMsg(&chunkList, status));
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/fsck/FetchFsckChunkListMsg.h>
#include <common/net/message/fsck/FetchFsckChunkListRespMsg.h>
class FetchFsckChunkListMsgEx : public FetchFsckChunkListMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,88 @@
#include "MoveChunkFileMsgEx.h"
#include <program/Program.h>
bool MoveChunkFileMsgEx::processIncoming(ResponseContext& ctx)
{
ctx.sendResponse(MoveChunkFileRespMsg(moveChunk()));
return true;
}
unsigned MoveChunkFileMsgEx::moveChunk()
{
const char* logContext = "MoveChunkFileMsg incoming";
App* app = Program::getApp();
std::string chunkName = this->getChunkName();
std::string oldPath = this->getOldPath(); // relative path to chunks dir
std::string newPath = this->getNewPath(); // relative path to chunks dir
uint16_t targetID = this->getTargetID();
bool overwriteExisting = this->getOverwriteExisting();
int renameRes;
std::string moveFrom = oldPath + "/" + chunkName;
std::string moveTo = newPath + "/" + chunkName;
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
LogContext(logContext).log(Log_CRITICAL, "Could not open path for target ID; targetID: "
+ StringTk::uintToStr(targetID));
return 1;
}
const auto targetPath = getIsMirrored()
? target->getPath() / CONFIG_BUDDYMIRROR_SUBDIR_NAME
: target->getPath() / CONFIG_CHUNK_SUBDIR_NAME;
const int targetFD = getIsMirrored() ? *target->getMirrorFD() : *target->getChunkFD();
// if overwriteExisting set to false, make sure, that output file does not exist
if (!overwriteExisting)
{
bool pathExists = StorageTk::pathExists(targetFD, moveTo);
if (pathExists)
{
LogContext(logContext).log(Log_CRITICAL,
"Could not move chunk file. Destination file does already exist; chunkID: " + chunkName
+ "; targetID: " + StringTk::uintToStr(targetID) + "; oldChunkPath: " + oldPath
+ "; newChunkPath: " + newPath);
return 1;
}
}
{
// create the parent directory (perhaps it didn't exist)
// can be more efficient if we write a createPathOnDisk that uses mkdirat
const Path moveToPath = targetPath / moveTo;
mode_t dirMode = S_IRWXU | S_IRWXG | S_IRWXO;
bool mkdirRes = StorageTk::createPathOnDisk(moveToPath, true, &dirMode);
if(!mkdirRes)
{
LogContext(logContext).log(Log_CRITICAL,
"Could not create parent directory for chunk; chunkID: " + chunkName + "; targetID: "
+ StringTk::uintToStr(targetID) + "; oldChunkPath: " + oldPath + "; newChunkPath: "
+ newPath);
return 1;
}
}
// perform the actual move
renameRes = renameat(targetFD, moveFrom.c_str(), targetFD, moveTo.c_str() );
if ( renameRes != 0 )
{
LogContext(logContext).log(Log_CRITICAL,
"Could not perform move; chunkID: " + chunkName + "; targetID: "
+ StringTk::uintToStr(targetID) + "; oldChunkPath: " + oldPath + "; newChunkPath: "
+ newPath + "; SysErr: " + System::getErrString());
return 1;
}
else if (getIsMirrored())
target->setBuddyNeedsResync(true);
return 0;
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include <common/net/message/NetMessage.h>
#include <common/net/message/fsck/MoveChunkFileMsg.h>
#include <common/net/message/fsck/MoveChunkFileRespMsg.h>
class MoveChunkFileMsgEx : public MoveChunkFileMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
unsigned moveChunk();
};

View File

@@ -0,0 +1,68 @@
#include "RequestStorageDataMsgEx.h"
bool RequestStorageDataMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
Node& node = app->getLocalNode();
MultiWorkQueueMap* workQueueMap = app->getWorkQueueMap();
StorageTargets* storageTargets = app->getStorageTargets();
// get disk space of each target
StorageTargetInfoList storageTargetInfoList;
storageTargets->generateTargetInfoList(storageTargetInfoList);
// compute total disk space and total free space
int64_t diskSpaceTotal = 0; // sum of all targets
int64_t diskSpaceFree = 0; // sum of all targets
for(StorageTargetInfoListIter iter = storageTargetInfoList.begin();
iter != storageTargetInfoList.end();
iter++)
{
if(diskSpaceTotal == -1)
continue; // statfs() failed on this target
diskSpaceTotal += iter->getDiskSpaceTotal();
diskSpaceFree += iter->getDiskSpaceFree();
}
unsigned sessionCount = app->getSessions()->getSize();
NicAddressList nicList(node.getNicList());
std::string hostnameid = System::getHostname();
// highresStats
HighResStatsList statsHistory;
uint64_t lastStatsMS = getValue();
// get stats history
StatsCollector* statsCollector = app->getStatsCollector();
statsCollector->getStatsSince(lastStatsMS, statsHistory);
// get work queue stats
unsigned indirectWorkListSize = 0;
unsigned directWorkListSize = 0;
for(MultiWorkQueueMapCIter iter = workQueueMap->begin(); iter != workQueueMap->end(); iter++)
{
indirectWorkListSize += iter->second->getIndirectWorkListSize();
directWorkListSize += iter->second->getDirectWorkListSize();
}
RequestStorageDataRespMsg requestStorageDataRespMsg(node.getAlias(), hostnameid, node.getNumID(),
&nicList, indirectWorkListSize, directWorkListSize, diskSpaceTotal, diskSpaceFree,
sessionCount, &statsHistory, &storageTargetInfoList);
ctx.sendResponse(requestStorageDataRespMsg);
LOG_DEBUG(__func__, Log_SPAM, std::string("Sent a message with type: " ) +
StringTk::uintToStr(requestStorageDataRespMsg.getMsgType() ) + std::string(" to mon") );
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_REQUESTSTORAGEDATA, getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,20 @@
#pragma once
#include <app/App.h>
#include <common/app/log/LogContext.h>
#include <common/components/worker/queue/MultiWorkQueue.h>
#include <common/net/message/mon/RequestStorageDataMsg.h>
#include <common/storage/StorageErrors.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageTargetInfo.h>
#include <common/toolkit/MessagingTk.h>
#include <common/net/message/mon/RequestStorageDataRespMsg.h>
#include <program/Program.h>
class RequestStorageDataMsgEx : public RequestStorageDataMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,428 @@
#include <common/net/message/nodes/GenericDebugRespMsg.h>
#include <common/net/msghelpers/MsgHelperGenericDebug.h>
#include <common/storage/quota/Quota.h>
#include <common/storage/StoragePoolId.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include <session/ZfsSession.h>
#include <toolkit/QuotaTk.h>
#include "GenericDebugMsgEx.h"
#define GENDBGMSG_OP_LISTOPENFILES "listopenfiles"
#define GENDBGMSG_OP_VERSION "version"
#define GENDBGMSG_OP_MSGQUEUESTATS "msgqueuestats"
#define GENDBGMSG_OP_RESYNCQUEUELEN "resyncqueuelen"
#define GENDBGMSG_OP_CHUNKLOCKSTORESIZE "chunklockstoresize"
#define GENDBGMSG_OP_CHUNKLOCKSTORECONTENTS "chunklockstore"
#define GENDBGMSG_OP_SETREJECTIONRATE "setrejectionrate"
bool GenericDebugMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("GenericDebugMsg incoming");
LOG_DEBUG_CONTEXT(log, 5, std::string("Command string: ") + getCommandStr() );
std::string cmdRespStr = processCommand();
ctx.sendResponse(GenericDebugRespMsg(cmdRespStr.c_str() ) );
App* app = Program::getApp();
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_GENERICDEBUG,
getMsgHeaderUserID() );
return true;
}
/**
* @return command response string
*/
std::string GenericDebugMsgEx::processCommand()
{
App* app = Program::getApp();
Config* cfg = app->getConfig();
std::string responseStr;
std::string operation;
// load command string into a stream to allow us to use getline
std::istringstream commandStream(getCommandStr() );
// get operation type from command string
std::getline(commandStream, operation, ' ');
if(operation == GENDBGMSG_OP_LISTOPENFILES)
responseStr = processOpListOpenFiles(commandStream);
else
if(operation == GENDBGMSG_OP_VERSION)
responseStr = processOpVersion(commandStream);
else
if(operation == GENDBGMSG_OP_MSGQUEUESTATS)
responseStr = processOpMsgQueueStats(commandStream);
else
if(operation == GENDBGMSG_OP_VARLOGMESSAGES)
responseStr = MsgHelperGenericDebug::processOpVarLogMessages(commandStream);
else
if(operation == GENDBGMSG_OP_VARLOGKERNLOG)
responseStr = MsgHelperGenericDebug::processOpVarLogKernLog(commandStream);
else
if(operation == GENDBGMSG_OP_FHGFSLOG)
responseStr = MsgHelperGenericDebug::processOpFhgfsLog(commandStream);
else
if(operation == GENDBGMSG_OP_LOADAVG)
responseStr = MsgHelperGenericDebug::processOpLoadAvg(commandStream);
else
if(operation == GENDBGMSG_OP_DROPCACHES)
responseStr = MsgHelperGenericDebug::processOpDropCaches(commandStream);
else
if(operation == GENDBGMSG_OP_GETCFG)
responseStr = MsgHelperGenericDebug::processOpCfgFile(commandStream, cfg->getCfgFile() );
else
if(operation == GENDBGMSG_OP_GETLOGLEVEL)
responseStr = MsgHelperGenericDebug::processOpGetLogLevel(commandStream);
else
if(operation == GENDBGMSG_OP_SETLOGLEVEL)
responseStr = MsgHelperGenericDebug::processOpSetLogLevel(commandStream);
else
if(operation == GENDBGMSG_OP_NETOUT)
responseStr = MsgHelperGenericDebug::processOpNetOut(commandStream,
app->getMgmtNodes(), app->getMetaNodes(), app->getStorageNodes() );
else
if(operation == GENDBGMSG_OP_QUOTAEXCEEDED)
responseStr = processOpQuotaExceeded(commandStream);
else
if(operation == GENDBGMSG_OP_USEDQUOTA)
responseStr = processOpUsedQuota(commandStream);
else
if(operation == GENDBGMSG_OP_RESYNCQUEUELEN)
responseStr = processOpResyncQueueLen(commandStream);
else
if(operation == GENDBGMSG_OP_CHUNKLOCKSTORESIZE)
responseStr = processOpChunkLockStoreSize(commandStream);
else
if(operation == GENDBGMSG_OP_CHUNKLOCKSTORECONTENTS)
responseStr = processOpChunkLockStoreContents(commandStream);
else
if(operation == GENDBGMSG_OP_LISTSTORAGESTATES)
responseStr = MsgHelperGenericDebug::processOpListTargetStates(commandStream,
app->getTargetStateStore() );
else
if(operation == GENDBGMSG_OP_SETREJECTIONRATE)
responseStr = processOpSetRejectionRate(commandStream);
else
responseStr = "Unknown/invalid operation";
return responseStr;
}
std::string GenericDebugMsgEx::processOpListOpenFiles(std::istringstream& commandStream)
{
// protocol: no arguments
App* app = Program::getApp();
SessionStore* sessions = app->getSessions();
std::ostringstream responseStream;
NumNodeIDList sessionIDs;
size_t numFilesTotal = 0;
size_t numCheckedSessions = 0; // may defer from number of initially queried sessions
size_t numSessions = sessions->getAllSessionIDs(&sessionIDs);
responseStream << "Found " << numSessions << " sessions." << std::endl;
responseStream << std::endl;
// walk over all sessions
for(NumNodeIDListCIter iter = sessionIDs.begin(); iter != sessionIDs.end(); iter++)
{
// note: sessionID might have become removed since we queried it, e.g. because client is gone
auto session = sessions->referenceSession(*iter);
if(!session)
continue;
numCheckedSessions++;
SessionLocalFileStore* sessionFiles = session->getLocalFiles();
size_t numFiles = sessionFiles->getSize();
if(!numFiles)
continue; // only print sessions with open files
numFilesTotal += numFiles;
responseStream << *iter << ": " << numFiles << std::endl;
}
responseStream << std::endl;
responseStream << "Final results: " << numFilesTotal << " open files in " <<
numCheckedSessions << " checked sessions";
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpVersion(std::istringstream& commandStream)
{
return BEEGFS_VERSION;
}
std::string GenericDebugMsgEx::processOpMsgQueueStats(std::istringstream& commandStream)
{
// protocol: no arguments
App* app = Program::getApp();
MultiWorkQueueMap* workQueueMap = app->getWorkQueueMap();
std::ostringstream responseStream;
std::string indirectQueueStats;
std::string directQueueStats;
std::string busyStats;
for(MultiWorkQueueMapCIter iter = workQueueMap->begin(); iter != workQueueMap->end(); iter++)
{
MultiWorkQueue* workQ = iter->second;
workQ->getStatsAsStr(indirectQueueStats, directQueueStats, busyStats);
responseStream << "* [queue id " << iter->first << "] "
"general queue stats: " << std::endl <<
indirectQueueStats << std::endl;
responseStream << "* [queue id " << iter->first << "] "
"direct queue stats: " << std::endl <<
directQueueStats << std::endl;
responseStream << "* [queue id " << iter->first << "] "
"busy worker stats: " << std::endl <<
busyStats << std::endl;
}
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpQuotaExceeded(std::istringstream& commandStream)
{
App* app = Program::getApp();
std::string targetIdStr;
std::getline(commandStream, targetIdStr, ' ');
uint16_t targetId = StringTk::strToUInt(targetIdStr);
if(!app->getConfig()->getQuotaEnableEnforcement() )
return "No quota exceeded IDs on this storage daemon because quota enforcement is"
"disabled.";
ExceededQuotaStorePtr exQuotaStore = app->getExceededQuotaStores()->get(targetId);
// exQuotaStore may be null;needs to be checked in MsgHelperGenericDebug::processOpQuotaExceeded
return MsgHelperGenericDebug::processOpQuotaExceeded(commandStream, exQuotaStore.get());
}
std::string GenericDebugMsgEx::processOpUsedQuota(std::istringstream& commandStream)
{
App *app = Program::getApp();
std::ostringstream responseStream;
ZfsSession session;
QuotaDataType quotaDataType = QuotaDataType_NONE;
std::string quotaDataTypeStr;
bool forEachTarget = false;
unsigned rangeStart = 0;
unsigned rangeEnd = 0;
// get parameter from command string
std::string inputString;
while(!commandStream.eof() )
{
std::getline(commandStream, inputString, ' ');
if(inputString == "uid")
{
quotaDataType = QuotaDataType_USER;
quotaDataTypeStr = "user";
}
else
if(inputString == "gid")
{
quotaDataType = QuotaDataType_GROUP;
quotaDataTypeStr = "group";
}
else
if(inputString == "forEachTarget")
forEachTarget = true;
else
if(inputString == "range")
{
std::string rangeValue;
std::getline(commandStream, rangeValue, ' ');
rangeStart = StringTk::strToUInt(rangeValue);
std::getline(commandStream, rangeValue, ' ');
rangeEnd = StringTk::strToUInt(rangeValue);
}
}
// verify given parameters
if(quotaDataType == QuotaDataType_NONE)
return "Invalid or missing quota data type argument.";
if(rangeStart == 0 && rangeEnd == 0)
return "Invalid or missing range argument.";
if(forEachTarget)
{
const auto& targets = app->getStorageTargets()->getTargets();
responseStream << "Quota data of " << targets.size() << " targets." << std::endl;
for (const auto& mapping : targets)
{
const auto& target = *mapping.second;
QuotaDataList outQuotaDataList;
QuotaBlockDeviceMap quotaBlockDevices = {
{mapping.first, target.getQuotaBlockDevice()}
};
QuotaTk::requestQuotaForRange(&quotaBlockDevices, rangeStart, rangeEnd, quotaDataType,
&outQuotaDataList, &session);
responseStream << outQuotaDataList.size() << " used quota for " << quotaDataTypeStr
<< " IDs on target: " << mapping.first << std::endl;
QuotaData::quotaDataListToString(outQuotaDataList, &responseStream);
}
}
else
{
auto& targets = app->getStorageTargets()->getTargets();
QuotaBlockDeviceMap quotaBlockDevices;
std::transform(
targets.begin(), targets.end(),
std::inserter(quotaBlockDevices, quotaBlockDevices.end()),
[] (const auto& target) {
return std::make_pair(target.first, target.second->getQuotaBlockDevice());
});
QuotaDataList outQuotaDataList;
QuotaTk::requestQuotaForRange(&quotaBlockDevices, rangeStart, rangeEnd, quotaDataType,
&outQuotaDataList, &session);
QuotaData::quotaDataListToString(outQuotaDataList, &responseStream);
}
return responseStream.str();
}
std::string GenericDebugMsgEx::processOpResyncQueueLen(std::istringstream& commandStream)
{
// protocol: targetID files/dirs as argument (e.g. "resyncqueuelen 1234 files")
// get parameter from command string
std::string targetIDStr;
uint16_t targetID;
std::string typeStr;
std::getline(commandStream, targetIDStr, ' ');
std::getline(commandStream, typeStr, ' ');
targetID = StringTk::strToUInt(targetIDStr);
if (targetID == 0)
return "Invalid or missing targetID";
BuddyResyncJob* resyncJob = Program::getApp()->getBuddyResyncer()->getResyncJob(targetID);
if (!resyncJob)
return "0";
if (typeStr == "files")
{
size_t count = resyncJob->syncCandidates.getNumFiles();
return StringTk::uintToStr(count);
}
else
if (typeStr == "dirs")
{
size_t count = resyncJob->syncCandidates.getNumDirs();
return StringTk::uintToStr(count);
}
else
return "Invalid or missing queue type";
}
std::string GenericDebugMsgEx::processOpChunkLockStoreSize(std::istringstream& commandStream)
{
// protocol: targetID as argument (e.g. "chunklockstoresize 1234")
// get parameter from command string
std::string targetIDStr;
uint16_t targetID;
std::getline(commandStream, targetIDStr, ' ');
targetID = StringTk::strToUInt(targetIDStr);
if (targetID == 0)
return "Invalid or missing targetID";
size_t lockStoreSize = Program::getApp()->getChunkLockStore()->getSize(targetID);
return StringTk::uintToStr(lockStoreSize);
}
std::string GenericDebugMsgEx::processOpChunkLockStoreContents(std::istringstream& commandStream)
{
// protocol: targetID and size limit (optional) as argument (e.g. "chunklockstoresize 1234 50")
std::stringstream outStream;
// get parameter from command string
std::string targetIDStr;
uint16_t targetID;
std::string maxEntriesStr;
unsigned maxEntries;
std::getline(commandStream, targetIDStr, ' ');
targetID = StringTk::strToUInt(targetIDStr);
std::getline(commandStream, maxEntriesStr, ' ');
maxEntries = StringTk::strToUInt(maxEntriesStr);
if (targetID == 0)
return "Invalid or missing targetID";
StringSet lockStoreContents = Program::getApp()->getChunkLockStore()->getLockStoreCopy(targetID);
unsigned lockStoreSize = lockStoreContents.size();
StringSetIter lockStoreIter = lockStoreContents.begin();
if ( (maxEntries == 0) || (maxEntries > lockStoreSize) )
maxEntries = lockStoreSize;
for (unsigned i = 0; i < maxEntries; i++)
{
outStream << *lockStoreIter << std::endl;
lockStoreIter++;
}
return outStream.str();
}
std::string GenericDebugMsgEx::processOpSetRejectionRate(std::istringstream& commandStream)
{
App* app = Program::getApp();
Config* cfg = app->getConfig();
std::string rejectionRateStr;
std::ostringstream responseStream;
std::getline(commandStream, rejectionRateStr, ' ');
unsigned rejectionRate = StringTk::strToUInt(rejectionRateStr);
cfg->setConnectionRejectionRate(rejectionRate);
responseStream << "Setting connection reject rate to " << rejectionRate << std::endl;
return responseStream.str();
}

View File

@@ -0,0 +1,24 @@
#pragma once
#include <common/net/message/nodes/GenericDebugMsg.h>
class GenericDebugMsgEx : public GenericDebugMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
std::string processCommand();
std::string processOpListOpenFiles(std::istringstream& commandStream);
std::string processOpVersion(std::istringstream& commandStream);
std::string processOpMsgQueueStats(std::istringstream& commandStream);
std::string processOpQuotaExceeded(std::istringstream& commandStream);
std::string processOpUsedQuota(std::istringstream& commandStream);
std::string processOpResyncQueueLen(std::istringstream& commandStream);
std::string processOpChunkLockStoreSize(std::istringstream& commandStream);
std::string processOpChunkLockStoreContents(std::istringstream& commandStream);
std::string processOpSetRejectionRate(std::istringstream& commandStream);
};

View File

@@ -0,0 +1,30 @@
#include <program/Program.h>
#include <common/net/message/storage/GetHighResStatsRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <common/nodes/OpCounter.h>
#include "GetClientStatsMsgEx.h"
#include <nodes/StorageNodeOpStats.h>
#include <common/net/message/nodes/GetClientStatsRespMsg.h>
/**
* Server side, called when the server gets a GetClientStatsMsgEx request
*/
bool GetClientStatsMsgEx::processIncoming(ResponseContext& ctx)
{
uint64_t cookieIP = getCookieIP(); // requested is cookie+1
// get stats
StorageNodeOpStats* clientOpStats = Program::getApp()->getNodeOpStats();
bool wantPerUserStats = isMsgHeaderFeatureFlagSet(GETCLIENTSTATSMSG_FLAG_PERUSERSTATS);
UInt64Vector opStatsVec;
clientOpStats->mapToUInt64Vec(
cookieIP, GETCLIENTSTATSRESP_MAX_PAYLOAD_LEN, wantPerUserStats, &opStatsVec);
ctx.sendResponse(GetClientStatsRespMsg(&opStatsVec) );
return true;
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include <common/storage/StorageErrors.h>
#include <common/net/message/nodes/GetClientStatsMsg.h>
// NOTE: The message factory requires this object to have 'deserialize' and
// 'processIncoming' methods. 'deserialize' is derived from other classes.
class GetClientStatsMsgEx : public GetClientStatsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,24 @@
#include <program/Program.h>
#include <storage/StorageTargets.h>
#include <common/net/message/nodes/GetTargetConsistencyStatesRespMsg.h>
#include "GetTargetConsistencyStatesMsgEx.h"
bool GetTargetConsistencyStatesMsgEx::processIncoming(ResponseContext& ctx)
{
StorageTargets* storageTargets = Program::getApp()->getStorageTargets();
TargetConsistencyStateVec states;
std::transform(
targetIDs.begin(), targetIDs.end(),
std::back_inserter(states),
[storageTargets] (uint16_t targetID) {
auto* const target = storageTargets->getTarget(targetID);
return target ? target->getConsistencyState() : TargetConsistencyState_BAD;
});
ctx.sendResponse(GetTargetConsistencyStatesRespMsg(states));
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/GetTargetConsistencyStatesMsg.h>
class GetTargetConsistencyStatesMsgEx : public GetTargetConsistencyStatesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,76 @@
#include <common/net/sock/NetworkInterfaceCard.h>
#include <program/Program.h>
#include "HeartbeatMsgEx.h"
#include <boost/lexical_cast.hpp>
bool HeartbeatMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("Heartbeat incoming");
App* app = Program::getApp();
bool isNodeNew;
// construct node
NicAddressList& nicList = getNicList();
auto node = std::make_shared<Node>(getNodeType(), getNodeID(), getNodeNumID(), getPortUDP(),
getPortTCP(), nicList);
// set local nic capabilities
NicAddressList localNicList(app->getLocalNicList() );
NicListCapabilities localNicCaps;
NetworkInterfaceCard::supportedCapabilities(&localNicList, &localNicCaps);
node->getConnPool()->setLocalNicList(localNicList, localNicCaps);
std::string nodeIDWithTypeStr = node->getNodeIDWithTypeStr();
log.log(Log_DEBUG, std::string("Heartbeat node: ") + nodeIDWithTypeStr);
// add/update node in store
AbstractNodeStore* nodes;
switch(getNodeType() )
{
case NODETYPE_Meta:
nodes = app->getMetaNodes(); break;
case NODETYPE_Mgmt:
nodes = app->getMgmtNodes(); break;
case NODETYPE_Storage:
nodes = app->getStorageNodes(); break;
default:
{
log.logErr("Invalid/unexpected node type: "
+ boost::lexical_cast<std::string>(getNodeType()));
goto ack_resp;
} break;
}
isNodeNew = (nodes->addOrUpdateNode(std::move(node)) == NodeStoreResult::Added);
if(isNodeNew)
{ // log info about new server
bool supportsRDMA = NetworkInterfaceCard::supportsRDMA(&nicList);
log.log(Log_WARNING, std::string("New node: ") +
nodeIDWithTypeStr + "; " +
std::string(supportsRDMA ? "RDMA; " : "") );
}
ack_resp:
acknowledge(ctx);
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_HEARTBEAT,
getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/HeartbeatMsg.h>
class HeartbeatMsgEx : public HeartbeatMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,28 @@
#include <common/net/message/nodes/HeartbeatMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "HeartbeatRequestMsgEx.h"
bool HeartbeatRequestMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("Heartbeat request incoming");
App* app = Program::getApp();
Config* cfg = app->getConfig();
Node& localNode = app->getLocalNode();
NumNodeID localNodeNumID = localNode.getNumID();
NicAddressList nicList(localNode.getNicList() );
HeartbeatMsg hbMsg(localNode.getAlias(), localNodeNumID, NODETYPE_Storage, &nicList);
hbMsg.setPorts(cfg->getConnStoragePort(), cfg->getConnStoragePort() );
ctx.sendResponse(hbMsg);
log.log(Log_DEBUG, std::string("Heartbeat req ip:") + StringTk::uintToHexStr(ctx.getSocket()->getPeerIP()));
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_HEARTBEAT,
getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
class HeartbeatRequestMsgEx : public HeartbeatRequestMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,44 @@
#include <common/net/message/nodes/MapTargetsRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <common/toolkit/ZipIterator.h>
#include <program/Program.h>
#include "MapTargetsMsgEx.h"
bool MapTargetsMsgEx::processIncoming(ResponseContext& ctx)
{
LogContext log("MapTargetsMsg incoming");
const App* app = Program::getApp();
const NodeStoreServers* storageNodes = app->getStorageNodes();
TargetMapper* targetMapper = app->getTargetMapper();
const NumNodeID nodeID = getNodeID();
std::map<uint16_t, FhgfsOpsErr> results;
for (const auto mapping : getTargets())
{
const auto targetId = mapping.first;
const auto poolId = mapping.second;
const auto mapRes = targetMapper->mapTarget(targetId, nodeID, poolId);
results[targetId] = mapRes.first;
if ( (mapRes.first != FhgfsOpsErr_SUCCESS) && (mapRes.second) )
{ // target could be mapped and is new
LOG_DEBUG_CONTEXT(log, Log_WARNING, "Mapping "
"target " + StringTk::uintToStr(targetId) +
" => " +
storageNodes->getNodeIDWithTypeStr(nodeID) );
IGNORE_UNUSED_VARIABLE(storageNodes);
}
}
if(!acknowledge(ctx) )
ctx.sendResponse(MapTargetsRespMsg(results));
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/nodes/MapTargetsMsg.h>
class MapTargetsMsgEx : public MapTargetsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,21 @@
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "PublishCapacitiesMsgEx.h"
bool PublishCapacitiesMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
InternodeSyncer* syncer = app->getInternodeSyncer();
// force upload of capacity information
syncer->setForcePublishCapacities();
// send response
acknowledge(ctx);
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/net/message/nodes/PublishCapacitiesMsg.h>
class PublishCapacitiesMsgEx : public PublishCapacitiesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,21 @@
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "RefreshTargetStatesMsgEx.h"
bool RefreshTargetStatesMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
InternodeSyncer* syncer = app->getInternodeSyncer();
// force update of capacity pools
syncer->setForceTargetStatesUpdate();
// send response
acknowledge(ctx);
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/nodes/RefreshTargetStatesMsg.h>
class RefreshTargetStatesMsgEx : public RefreshTargetStatesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,124 @@
#include "RemoveBuddyGroupMsgEx.h"
#include <common/net/message/nodes/RemoveBuddyGroupRespMsg.h>
#include <net/message/storage/listing/ListChunkDirIncrementalMsgEx.h>
#include <program/Program.h>
static FhgfsOpsErr checkChunkDirRemovable(const int dirFD)
{
DIR* dir = fdopendir(dirFD);
std::unique_ptr<DIR, StorageTk::CloseDirDeleter> _dir(dir);
while (true)
{
struct dirent* result;
#if USE_READDIR_R
struct dirent buffer;
if (readdir_r(dir, &buffer, &result) != 0)
break;
#else
errno = 0;
result = readdir(dir);
if (!result && errno)
break;
#endif
if (!result)
return FhgfsOpsErr_SUCCESS;
if (strcmp(result->d_name, ".") == 0 || strcmp(result->d_name, "..") == 0)
continue;
struct stat statData;
const int statRes = ::fstatat(dirfd(dir), result->d_name, &statData, AT_SYMLINK_NOFOLLOW);
if (statRes != 0)
{
LOG(MIRRORING, ERR, "Could not stat something in chunk directory.");
return FhgfsOpsErr_INTERNAL;
}
if (!S_ISDIR(statData.st_mode))
return FhgfsOpsErr_NOTEMPTY;
const int subdir = ::openat(dirfd(dir), result->d_name, O_RDONLY);
if (subdir < 0)
{
LOG(MIRRORING, ERR, "Could not open directory in chunk path.");
return FhgfsOpsErr_INTERNAL;
}
const FhgfsOpsErr checkRes = checkChunkDirRemovable(subdir);
if (checkRes != FhgfsOpsErr_SUCCESS)
return checkRes;
}
return FhgfsOpsErr_INTERNAL;
}
bool RemoveBuddyGroupMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
if (type != NODETYPE_Storage)
{
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
return true;
}
uint16_t targetID = app->getMirrorBuddyGroupMapper()->getPrimaryTargetID(groupID);
if (app->getTargetMapper()->getNodeID(targetID) != app->getLocalNode().getNumID())
targetID = app->getMirrorBuddyGroupMapper()->getSecondaryTargetID(groupID);
if (app->getTargetMapper()->getNodeID(targetID) != app->getLocalNode().getNumID())
{
LOG(MIRRORING, ERR, "Group is not mapped on this target.", groupID);
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
return true;
}
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
LOG(MIRRORING, ERR, "Could not open directory file descriptor.", groupID);
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
return true;
}
const int dirFD = openat(*target->getMirrorFD(), ".", O_RDONLY);
if (dirFD < 0)
{
LOG(MIRRORING, ERR, "Could not open directory file descriptor.", groupID);
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
return true;
}
const FhgfsOpsErr checkRes = checkChunkDirRemovable(dirFD);
const bool forceAndNotEmpty = checkRes == FhgfsOpsErr_NOTEMPTY && force;
if (checkRes == FhgfsOpsErr_SUCCESS || forceAndNotEmpty)
{
if (!checkOnly)
{
auto* const bgm = Program::getApp()->getMirrorBuddyGroupMapper();
const NumNodeID localID = Program::getApp()->getLocalNode().getNumID();
if (!bgm->unmapMirrorBuddyGroup(groupID, localID))
{
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_INTERNAL));
return true;
}
}
ctx.sendResponse(RemoveBuddyGroupRespMsg(FhgfsOpsErr_SUCCESS));
return true;
}
else
{
ctx.sendResponse(RemoveBuddyGroupRespMsg(checkRes));
return true;
}
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/RemoveBuddyGroupMsg.h>
class RemoveBuddyGroupMsgEx : public RemoveBuddyGroupMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,37 @@
#include <common/net/message/nodes/RemoveNodeRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "RemoveNodeMsgEx.h"
bool RemoveNodeMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
LOG_DBG(GENERAL, SPAM, "Removing node.", getNodeNumID());
if (getNodeType() == NODETYPE_Storage)
{
NodeStoreServers* nodes = app->getStorageNodes();
auto node = nodes->referenceNode(getNodeNumID());
bool delRes = nodes->deleteNode(getNodeNumID());
// log
if (delRes)
{
LOG(GENERAL, WARNING, "Node removed.", ("node", node->getNodeIDWithTypeStr()));
LOG(GENERAL, WARNING, "Number of nodes in the system:",
("meta", app->getMetaNodes()->getSize()),
("storage", app->getStorageNodes()->getSize()));
}
}
if (!acknowledge(ctx))
ctx.sendResponse(RemoveNodeRespMsg(0));
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_REMOVENODE,
getMsgHeaderUserID() );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/nodes/RemoveNodeMsg.h>
class RemoveNodeMsgEx : public RemoveNodeMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,37 @@
#include <common/net/message/nodes/SetMirrorBuddyGroupRespMsg.h>
#include <common/nodes/MirrorBuddyGroupMapper.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "SetMirrorBuddyGroupMsgEx.h"
bool SetMirrorBuddyGroupMsgEx::processIncoming(ResponseContext& ctx)
{
uint16_t buddyGroupID = this->getBuddyGroupID();
if (getNodeType() != NODETYPE_Storage)
{
// The storage server has no mapper for meta buddy groups - nothing to do, just acknowledge
if (!acknowledge(ctx))
ctx.sendResponse(SetMirrorBuddyGroupRespMsg(FhgfsOpsErr_SUCCESS, buddyGroupID));
return true;
}
App* app = Program::getApp();
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
uint16_t primaryTargetID = this->getPrimaryTargetID();
uint16_t secondaryTargetID = this->getSecondaryTargetID();
bool allowUpdate = this->getAllowUpdate();
uint16_t newBuddyGroupID = 0;
FhgfsOpsErr mapResult = buddyGroupMapper->mapMirrorBuddyGroup(buddyGroupID, primaryTargetID,
secondaryTargetID, app->getLocalNode().getNumID(), allowUpdate, &newBuddyGroupID);
if(!acknowledge(ctx) )
ctx.sendResponse(SetMirrorBuddyGroupRespMsg(mapResult, newBuddyGroupID) );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/nodes/SetMirrorBuddyGroupMsg.h>
class SetMirrorBuddyGroupMsgEx : public SetMirrorBuddyGroupMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,40 @@
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <common/nodes/TargetStateStore.h>
#include <common/toolkit/ZipIterator.h>
#include <program/Program.h>
#include "SetTargetConsistencyStatesMsgEx.h"
bool SetTargetConsistencyStatesMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
StorageTargets* storageTargets = app->getStorageTargets();
FhgfsOpsErr result = FhgfsOpsErr_SUCCESS;
if (getTargetIDs().size() != getStates().size())
{
LogContext(__func__).logErr("Different list size of targetIDs and states");
result = FhgfsOpsErr_INTERNAL;
goto send_response;
}
for (ZipIterRange<UInt16List, UInt8List> idStateIter(getTargetIDs(), getStates());
!idStateIter.empty(); ++idStateIter)
{
auto* const target = storageTargets->getTarget(*idStateIter()->first);
if (!target)
{
LogContext(__func__).logErr("Unknown targetID: " +
StringTk::uintToStr(*(idStateIter()->first) ) );
result = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
target->setState(TargetConsistencyState(*idStateIter()->second));
}
send_response:
ctx.sendResponse(SetTargetConsistencyStatesRespMsg(result) );
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
class SetTargetConsistencyStatesMsgEx : public SetTargetConsistencyStatesMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,66 @@
#include <app/App.h>
#include <common/net/message/nodes/StorageBenchControlMsgResp.h>
#include <components/benchmarker/StorageBenchOperator.h>
#include <program/Program.h>
#include "StorageBenchControlMsgEx.h"
bool StorageBenchControlMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "StorageBenchControlMsg incoming";
StorageBenchResultsMap results;
int cmdErrorCode = STORAGEBENCH_ERROR_NO_ERROR;
App* app = Program::getApp();
StorageBenchOperator* storageBench = app->getStorageBenchOperator();
switch(getAction())
{
case StorageBenchAction_START:
{
cmdErrorCode = storageBench->initAndStartStorageBench(&getTargetIDs(), getBlocksize(),
getSize(), getThreads(), getODirect(), getType() );
} break;
case StorageBenchAction_STOP:
{
cmdErrorCode = storageBench->stopBenchmark();
} break;
case StorageBenchAction_STATUS:
{
storageBench->getStatusWithResults(&getTargetIDs(), &results);
cmdErrorCode = STORAGEBENCH_ERROR_NO_ERROR;
} break;
case StorageBenchAction_CLEANUP:
{
cmdErrorCode = storageBench->cleanup(&getTargetIDs());
} break;
default:
{
LogContext(logContext).logErr("unknown action!");
} break;
}
int errorCode;
// check if the last command from the fhgfs_cmd was successful,
// if not send the error code of the command to the fhgfs_cmd
// if it was successful, send the error code of the last run or acutely run of the benchmark
if (cmdErrorCode != STORAGEBENCH_ERROR_NO_ERROR)
{
errorCode = cmdErrorCode;
}
else
{
errorCode = storageBench->getLastRunErrorCode();
}
ctx.sendResponse(
StorageBenchControlMsgResp(storageBench->getStatus(), getAction(),
storageBench->getType(), errorCode, results) );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/nodes/StorageBenchControlMsg.h>
#include <common/Common.h>
class StorageBenchControlMsgEx: public StorageBenchControlMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,14 @@
#include "RefreshStoragePoolsMsgEx.h"
#include <program/Program.h>
bool RefreshStoragePoolsMsgEx::processIncoming(ResponseContext& ctx)
{
Program::getApp()->getInternodeSyncer()->setForceStoragePoolsUpdate();
// can only come as an AcknowledgableMsg from mgmtd
acknowledge(ctx);
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/nodes/storagepools/RefreshStoragePoolsMsg.h>
class RefreshStoragePoolsMsgEx : public RefreshStoragePoolsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,94 @@
#include <program/Program.h>
#include <common/net/message/session/FSyncLocalFileRespMsg.h>
#include <common/storage/StorageErrors.h>
#include <net/msghelpers/MsgHelperIO.h>
#include "FSyncLocalFileMsgEx.h"
bool FSyncLocalFileMsgEx::processIncoming(ResponseContext& ctx)
{
ctx.sendResponse(FSyncLocalFileRespMsg(fsync()));
return true;
}
FhgfsOpsErr FSyncLocalFileMsgEx::fsync()
{
const char* logContext = "FSyncLocalFileMsg incoming";
FhgfsOpsErr clientRes = FhgfsOpsErr_SUCCESS;
bool isMirrorSession = isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_BUDDYMIRROR);
// do session check only when it is not a mirror session
bool useSessionCheck = isMirrorSession ? false :
isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_SESSION_CHECK);
App* app = Program::getApp();
SessionStore* sessions = app->getSessions();
auto session = sessions->referenceOrAddSession(getSessionID());
SessionLocalFileStore* sessionLocalFiles = session->getLocalFiles();
// select the right targetID
uint16_t targetID = getTargetID();
if(isMirrorSession)
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
// note: only log message here, error handling will happen below through invalid targetFD
if(unlikely(!targetID) )
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
}
auto sessionLocalFile =
sessionLocalFiles->referenceSession(getFileHandleID(), targetID, isMirrorSession);
if(sessionLocalFile)
{ // sessionLocalFile exists => check if open and perform fsync
if (!isMsgHeaderFeatureFlagSet(FSYNCLOCALFILEMSG_FLAG_NO_SYNC) )
{
auto& fd = sessionLocalFile->getFD();
if (fd.valid())
{ // file open => sync
int fsyncRes = MsgHelperIO::fsync(*fd);
if(fsyncRes)
{
LogContext log(logContext);
log.log(Log_WARNING, std::string("fsync of chunk file failed. ") +
std::string("SessionID: ") + getSessionID().str() +
std::string(". SysErr: ") + System::getErrString() );
clientRes = FhgfsOpsErr_INTERNAL;
}
}
}
if(useSessionCheck && sessionLocalFile->isServerCrashed() )
{ // server crashed during the write, maybe lost some data send error to client
LogContext log(logContext);
log.log(Log_SPAM, "Potential cache loss for open file handle. (Server crash detected.) "
"The session is marked as dirty.");
clientRes = FhgfsOpsErr_STORAGE_SRV_CRASHED;
}
}
else
if (useSessionCheck)
{ // the server crashed during a write or before the close was successful
LogContext log(logContext);
log.log(Log_WARNING, "Potential cache loss for open file handle. (Server crash detected.) "
"No session for file available. "
"FileHandleID: " + std::string(getFileHandleID()) );
clientRes = FhgfsOpsErr_STORAGE_SRV_CRASHED;
}
return clientRes;
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <common/net/message/session/FSyncLocalFileMsg.h>
class FSyncLocalFileMsgEx : public FSyncLocalFileMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
FhgfsOpsErr fsync();
};

View File

@@ -0,0 +1,252 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/session/opening/CloseChunkFileRespMsg.h>
#include <common/toolkit/SessionTk.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "CloseChunkFileMsgEx.h"
#include <boost/lexical_cast.hpp>
bool CloseChunkFileMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
FhgfsOpsErr closeMsgRes;
DynamicAttribs dynAttribs;
std::tie(closeMsgRes, dynAttribs) = close(ctx);
// if closeMsgRes == FhgfsOpsErr_COMMUNICATION, a GenericResponseMsg has been sent already
if (closeMsgRes != FhgfsOpsErr_COMMUNICATION)
ctx.sendResponse(
CloseChunkFileRespMsg(closeMsgRes, dynAttribs.filesize, dynAttribs.allocedBlocks,
dynAttribs.modificationTimeSecs, dynAttribs.lastAccessTimeSecs,
dynAttribs.storageVersion) );
// update op counters
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_CLOSELOCAL,
getMsgHeaderUserID() );
return true;
}
std::pair<FhgfsOpsErr, CloseChunkFileMsgEx::DynamicAttribs> CloseChunkFileMsgEx::close(
ResponseContext& ctx)
{
const char* logContext = "CloseChunkFileMsg incoming";
App* app = Program::getApp();
Config* config = app->getConfig();
SessionStore* sessions = app->getSessions();
uint16_t targetID;
FhgfsOpsErr closeMsgRes = FhgfsOpsErr_SUCCESS; // the result that will be sent to requestor
DynamicAttribs dynAttribs = {0, 0, 0, 0, 0};
std::string fileHandleID(getFileHandleID() );
bool isMirrorSession = isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR);
SessionLocalFileStore* sessionLocalFiles;
// select the right targetID
targetID = getTargetID();
if(isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
if(unlikely(!targetID) )
{ // unknown target
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
return {FhgfsOpsErr_UNKNOWNTARGET, {}};
}
}
// forward to secondary (if appropriate)
closeMsgRes = forwardToSecondary(ctx);
if (unlikely(closeMsgRes != FhgfsOpsErr_SUCCESS))
return {closeMsgRes, dynAttribs};
auto session = sessions->referenceOrAddSession(getSessionID());
sessionLocalFiles = session->getLocalFiles();
auto fsState = sessionLocalFiles->removeSession(fileHandleID, targetID, isMirrorSession);
// get current dynamic file attribs
if (fsState)
{ // file no longer in use => refresh filesize and close file fd
auto& fd = fsState->getFD();
/* get dynamic attribs, here before closing the file.
* Note: Depending on the underlying file system the returned st_blocks might be too large
* (pre-allocated blocks, which are only released on close() ). Advantage here is
* that we already have the file descriptor. */
if( (config->getTuneEarlyStat() ) &&
(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS) ) )
getDynamicAttribsByFD(*fd, fileHandleID, targetID, dynAttribs);
// close fd
if (!fsState->close())
closeMsgRes = FhgfsOpsErr_INTERNAL;
// only get the attributes here, in order to make xfs to release pre-allocated blocks
if( (!config->getTuneEarlyStat() ) &&
(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS) ) )
getDynamicAttribsByPath(fileHandleID, targetID, dynAttribs);
}
else
if(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_NODYNAMICATTRIBS) )
{ // file still in use by other threads => get dynamic attribs by path
bool getRes = getDynamicAttribsByPath(fileHandleID, targetID, dynAttribs);
if (getRes)
{
// LogContext(logContext).log(Log_DEBUG, "Chunk file virtually closed. "
// "HandleID: " + fileHandleID);
}
}
// note: "file not exists" is not an error. we just have nothing to do in that case.
return {closeMsgRes, dynAttribs};
}
/**
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
*
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
* case *outChunkLocked==false is guaranteed).
* @throw SocketException if sending of GenericResponseMsg fails.
*/
FhgfsOpsErr CloseChunkFileMsgEx::forwardToSecondary(ResponseContext& ctx)
{
const char* logContext = "CloseChunkFileMsg incoming (forward to secondary)";
App* app = Program::getApp();
if(!isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR) ||
isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
return FhgfsOpsErr_SUCCESS; // nothing to do
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
addMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND);
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_CloseChunkFileResp);
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
// remove the flag that we just added for secondary
unsetMsgHeaderFeatureFlag(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR_SECOND);
if(unlikely(
(commRes == FhgfsOpsErr_COMMUNICATION) &&
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
{
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );;
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
}
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_DEBUG, "Forwarding failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
"error: " + boost::lexical_cast<std::string>(commRes));
std::string genericRespStr = "Communication with secondary failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(genericRespStr)));
return FhgfsOpsErr_COMMUNICATION;
}
CloseChunkFileRespMsg* respMsg = (CloseChunkFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr secondaryRes = respMsg->getResult();
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
boost::lexical_cast<std::string>(secondaryRes) + "; "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
return secondaryRes;
}
return FhgfsOpsErr_SUCCESS;
}
bool CloseChunkFileMsgEx::getDynamicAttribsByFD(const int fd, std::string fileHandleID,
uint16_t targetID, DynamicAttribs& outDynAttribs)
{
SyncedStoragePaths* syncedPaths = Program::getApp()->getSyncedStoragePaths();
std::string fileID(SessionTk::fileIDFromHandleID(fileHandleID) );
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // LOCK
// note: this is locked because we need to get the filesize together with the storageVersion
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(fd, &outDynAttribs.filesize,
&outDynAttribs.allocedBlocks, &outDynAttribs.modificationTimeSecs,
&outDynAttribs.lastAccessTimeSecs);
if(getDynAttribsRes)
outDynAttribs.storageVersion = storageVersion;
syncedPaths->unlockPath(fileID, targetID); // UNLOCK
return getDynAttribsRes;
}
bool CloseChunkFileMsgEx::getDynamicAttribsByPath(std::string fileHandleID, uint16_t targetID,
DynamicAttribs& outDynAttribs)
{
const char* logContext = "CloseChunkFileMsg (attribs by path)";
App* app = Program::getApp();
SyncedStoragePaths* syncedPaths = app->getSyncedStoragePaths();
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{ // unknown targetID
LogContext(logContext).logErr("Unknown targetID: " + StringTk::uintToStr(targetID) );
return false;
}
const int targetFD = isMsgHeaderFeatureFlagSet(CLOSECHUNKFILEMSG_FLAG_BUDDYMIRROR)
? *target->getMirrorFD()
: *target->getChunkFD();
std::string fileID = SessionTk::fileIDFromHandleID(fileHandleID);
std::string pathStr = StorageTk::getFileChunkPath(getPathInfo(), fileID);
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // L O C K path
// note: this is locked because we need to get the filesize together with the storageVersion
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(targetFD, pathStr.c_str(),
&outDynAttribs.filesize, &outDynAttribs.allocedBlocks, &outDynAttribs.modificationTimeSecs,
&outDynAttribs.lastAccessTimeSecs);
if(getDynAttribsRes)
outDynAttribs.storageVersion = storageVersion;
syncedPaths->unlockPath(fileID, targetID); // U N L O C K path
return getDynAttribsRes;
}

View File

@@ -0,0 +1,29 @@
#pragma once
#include <common/net/message/session/opening/CloseChunkFileMsg.h>
class CloseChunkFileMsgEx : public CloseChunkFileMsg
{
private:
struct DynamicAttribs
{
int64_t filesize;
int64_t allocedBlocks; // allocated 512byte blocks (relevant for sparse files)
int64_t modificationTimeSecs;
int64_t lastAccessTimeSecs;
uint64_t storageVersion;
};
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
FhgfsOpsErr forwardToSecondary(ResponseContext& ctx);
bool getDynamicAttribsByFD(int fd, std::string fileHandleID, uint16_t targetID,
DynamicAttribs& outDynAttribs);
bool getDynamicAttribsByPath(std::string fileHandleID, uint16_t targetID,
DynamicAttribs& outDynAttribs);
std::pair<FhgfsOpsErr, DynamicAttribs> close(ResponseContext& ctx);
};

View File

@@ -0,0 +1,114 @@
#pragma once
#ifdef BEEGFS_NVFS
#include <string>
#include <typeinfo>
#include <common/net/message/session/rw/ReadLocalFileRDMAMsg.h>
#include <common/storage/StorageErrors.h>
#include <common/components/worker/Worker.h>
#include <session/SessionLocalFileStore.h>
#include "ReadLocalFileV2MsgEx.h"
/**
* Implements RDMA write protocol.
*/
class ReadLocalFileRDMAMsgSender : public ReadLocalFileRDMAMsg
{
public:
struct ReadState : public ReadStateBase
{
RdmaInfo* rdma;
uint64_t rBuf;
size_t rLen;
uint64_t rOff;
ReadState(const char* logContext, uint64_t toBeRead,
SessionLocalFile* sessionLocalFile) :
ReadStateBase(logContext, toBeRead, sessionLocalFile) {}
};
private:
friend class ReadLocalFileMsgExBase<ReadLocalFileRDMAMsgSender, ReadState>;
static std::string logContextPref;
inline void sendLengthInfo(Socket* sock, int64_t lengthInfo)
{
lengthInfo = HOST_TO_LE_64(lengthInfo);
sock->send(&lengthInfo, sizeof(int64_t), 0);
}
/**
* RDMA write data to the remote buffer.
*/
inline ssize_t readStateSendData(Socket* sock, ReadState& rs, char* buf, bool isFinal)
{
ssize_t writeRes = sock->write(buf, rs.readRes, 0, rs.rBuf + rs.rOff, rs.rdma->key);
LOG_DEBUG(rs.logContext, Log_DEBUG,
"buf: " + StringTk::uint64ToHexStr((uint64_t)buf) + "; "
"bufLen: " + StringTk::int64ToStr(rs.readRes) + "; "
"rbuf: " + StringTk::uint64ToHexStr(rs.rBuf) + "; "
"rkey: " + StringTk::uintToHexStr(rs.rdma->key) + "; "
"writeRes: " + StringTk::int64ToStr(writeRes));
if (unlikely(writeRes != rs.readRes))
{
LogContext(rs.logContext).logErr("Unable to write file data to client. "
"FileID: " + rs.sessionLocalFile->getFileID() + "; "
"SysErr: " + System::getErrString());
writeRes = -1;
}
if (isFinal && likely(writeRes >= 0))
sendLengthInfo(sock, getCount() - rs.toBeRead);
return writeRes;
}
inline ssize_t getReadLength(ReadState& rs, ssize_t len)
{
// Cannot RDMA anything larger than WORKER_BUFOUT_SIZE in a single operation
// because that is the size of the buffer passed in by the Worker.
// TODO: pass around a Buffer with a length instead of unqualified char*.
return BEEGFS_MIN(BEEGFS_MIN(len, ssize_t(rs.rLen - rs.rOff)), WORKER_BUFOUT_SIZE);
}
inline bool readStateInit(ReadState& rs)
{
rs.rdma = getRdmaInfo();
if (unlikely(!rs.rdma->next(rs.rBuf, rs.rLen, rs.rOff)))
{
LogContext(rs.logContext).logErr("No entities in RDMA buffers.");
return false;
}
return true;
}
inline bool readStateNext(ReadState& rs)
{
rs.rOff += rs.readRes;
if (rs.rOff == rs.rLen)
{
if (unlikely(!rs.rdma->next(rs.rBuf, rs.rLen, rs.rOff)))
{
LogContext(rs.logContext).logErr("RDMA buffers exhausted");
return false;
}
}
return true;
}
inline size_t getBuffers(ResponseContext& ctx, char** dataBuf, char** sendBuf)
{
*dataBuf = ctx.getBuffer();
*sendBuf = *dataBuf;
return ctx.getBufferLength();
}
};
typedef ReadLocalFileMsgExBase<ReadLocalFileRDMAMsgSender,
ReadLocalFileRDMAMsgSender::ReadState> ReadLocalFileRDMAMsgEx;
#endif /* BEEGFS_NVFS */

View File

@@ -0,0 +1,466 @@
#include <program/Program.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/SessionTk.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <toolkit/StorageTkEx.h>
#include "ReadLocalFileV2MsgEx.h"
#ifdef BEEGFS_NVFS
#include "ReadLocalFileRDMAMsgEx.h"
#endif
#include <sys/sendfile.h>
#include <sys/mman.h>
#define READ_USE_TUNEFILEREAD_TRIGGER (4*1024*1024) /* seq IO trigger for tuneFileReadSize */
#define READ_BUF_OFFSET_PROTO_MIN (sizeof(int64_t) ) /* for prepended length info */
#define READ_BUF_END_PROTO_MIN (sizeof(int64_t) ) /* for appended length info */
/* reserve more than necessary at buf start to achieve page cache alignment */
const size_t READ_BUF_OFFSET =
BEEGFS_MAX( (long)READ_BUF_OFFSET_PROTO_MIN, sysconf(_SC_PAGESIZE) );
/* reserve more than necessary at buf end to achieve page cache alignment */
const size_t READ_BUF_END_RESERVE =
BEEGFS_MAX( (long)READ_BUF_END_PROTO_MIN, sysconf(_SC_PAGESIZE) );
/* read buffer size cutoff for protocol data */
const size_t READ_BUF_LEN_PROTOCOL_CUTOFF =
READ_BUF_OFFSET + READ_BUF_END_RESERVE;
// A linker error occurs for processIncoming without having this forced linkage.
static ReadLocalFileV2MsgEx forcedLinkageV2;
#ifdef BEEGFS_NVFS
static ReadLocalFileRDMAMsgEx forcedLinkageRDMA;
#endif
std::string ReadLocalFileV2MsgSender::logContextPref = "ReadChunkFileV2Msg";
#ifdef BEEGFS_NVFS
std::string ReadLocalFileRDMAMsgSender::logContextPref = "ReadChunkFileRDMAMsg";
#endif
template <class Msg, typename ReadState>
bool ReadLocalFileMsgExBase<Msg, ReadState>::processIncoming(NetMessage::ResponseContext& ctx)
{
std::string logContext = Msg::logContextPref + " incoming";
bool retVal = true; // return value
int64_t readRes = 0;
std::string fileHandleID(getFileHandleID() );
bool isMirrorSession = isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_BUDDYMIRROR);
// do session check only when it is not a mirror session
bool useSessionCheck = isMirrorSession ? false :
isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_SESSION_CHECK);
App* app = Program::getApp();
SessionStore* sessions = app->getSessions();
auto session = sessions->referenceOrAddSession(getClientNumID());
this->sessionLocalFiles = session->getLocalFiles();
// select the right targetID
uint16_t targetID = getTargetID();
if(isMirrorSession )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
// note: only log message here, error handling will happen below through invalid targetFD
if(unlikely(!targetID) )
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
}
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
if (isMirrorSession)
{ /* buddy mirrored file => fail with Err_COMMUNICATION to make the requestor retry.
mgmt will mark this target as (p)offline in a few moments. */
LOG(GENERAL, NOTICE, "Unknown target ID, refusing request.", targetID);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
return true;
}
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_UNKNOWNTARGET);
return true;
}
// check if we already have a session for this file...
auto sessionLocalFile = sessionLocalFiles->referenceSession(
fileHandleID, targetID, isMirrorSession);
if(!sessionLocalFile)
{ // sessionLocalFile not exists yet => create, insert, re-get it
if(useSessionCheck)
{ // server crashed during the write, maybe lost some data send error to client
LogContext log(logContext);
log.log(Log_WARNING, "Potential cache loss for open file handle. (Server crash detected.) "
"No session for file available. "
"FileHandleID: " + fileHandleID);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_STORAGE_SRV_CRASHED);
goto release_session;
}
std::string fileID = SessionTk::fileIDFromHandleID(fileHandleID);
int openFlags = SessionTk::sysOpenFlagsFromFhgfsAccessFlags(getAccessFlags() );
auto newFile = boost::make_unique<SessionLocalFile>(fileHandleID, targetID, fileID, openFlags,
false);
if(isMirrorSession)
newFile->setIsMirrorSession(true);
sessionLocalFile = sessionLocalFiles->addAndReferenceSession(std::move(newFile));
}
else
{ // session file exists
if(useSessionCheck && sessionLocalFile->isServerCrashed() )
{ // server crashed during the write, maybe lost some data send error to client
LogContext log(logContext);
log.log(Log_SPAM, "Potential cache loss for open file handle. (Server crash detected.) "
"The session is marked as dirty. "
"FileHandleID: " + fileHandleID);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_STORAGE_SRV_CRASHED);
goto release_session;
}
}
/* Note: the session file must be unlocked/released before we send the finalizing info,
because otherwise we have a race when the client assumes the read is complete and tries
to close the file (while the handle is actually still referenced on the server). */
/* Note: we also must be careful to update the current offset before sending the final length
info because otherwise the session file might have been released already and we have no
longer access to the offset. */
readRes = -1;
try
{
// prepare file descriptor (if file not open yet then open it if it exists already)
FhgfsOpsErr openRes = openFile(*target, sessionLocalFile.get());
if(openRes != FhgfsOpsErr_SUCCESS)
{
sendLengthInfo(ctx.getSocket(), -openRes);
goto release_session;
}
// check if file exists
if(!sessionLocalFile->getFD().valid())
{ // file didn't exist (not an error) => send EOF
sendLengthInfo(ctx.getSocket(), 0);
goto release_session;
}
// the actual read workhorse...
readRes = incrementalReadStatefulAndSendV2(ctx, sessionLocalFile.get());
LOG_DEBUG(logContext, Log_SPAM, "sending completed. "
"readRes: " + StringTk::int64ToStr(readRes) );
IGNORE_UNUSED_VARIABLE(readRes);
}
catch(SocketException& e)
{
LogContext(logContext).logErr(std::string("SocketException occurred: ") + e.what() );
LogContext(logContext).log(Log_WARNING, "Details: "
"sessionID: " + getClientNumID().str() + "; "
"fileHandle: " + fileHandleID + "; "
"offset: " + StringTk::int64ToStr(getOffset() ) + "; "
"count: " + StringTk::int64ToStr(getCount() ) );
sessionLocalFile->setOffset(-1); /* invalidate offset (we can only do this if still locked,
but that's not a prob if we update offset correctly before send - see notes above) */
retVal = false;
goto release_session;
}
release_session:
// update operation counters
if(likely(readRes > 0) )
app->getNodeOpStats()->updateNodeOp(
ctx.getSocket()->getPeerIP(), StorageOpCounter_READOPS, readRes, getMsgHeaderUserID() );
return retVal;
}
inline size_t ReadLocalFileV2MsgSender::getBuffers(ResponseContext& ctx, char** dataBuf, char** sendBuf)
{
*dataBuf = ctx.getBuffer() + READ_BUF_OFFSET; // offset for prepended data length info
*sendBuf = *dataBuf - READ_BUF_OFFSET_PROTO_MIN;
return ctx.getBufferLength() - READ_BUF_LEN_PROTOCOL_CUTOFF; /* cutoff for
prepended and finalizing length info */
}
/**
* Note: This is similar to incrementalReadAndSend, but uses the offset from sessionLocalFile
* to avoid calling seek every time.
*
* Warning: Do not use the returned value to set the new offset, as there might be other threads
* that also did something with the file (i.e. the io-lock is released somewhere within this
* method).
*
* @return number of bytes read or some arbitrary negative value otherwise
*/
template <class Msg, typename ReadState>
int64_t ReadLocalFileMsgExBase<Msg, ReadState>::incrementalReadStatefulAndSendV2(NetMessage::ResponseContext& ctx,
SessionLocalFile* sessionLocalFile)
{
/* note on session offset: the session offset must always be set before sending the data to the
client (otherwise the client could send the next request before we updated the offset, which
would lead to a race condition) */
std::string logContext = Msg::logContextPref + " (read incremental)";
Config* cfg = Program::getApp()->getConfig();
char* dataBuf;
char* sendBuf;
if (READ_BUF_LEN_PROTOCOL_CUTOFF >= ctx.getBufferLength())
{ // buffer too small. That shouldn't happen and is an error
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_INTERNAL);
return -1;
}
const ssize_t dataBufLen = getBuffers(ctx, &dataBuf, &sendBuf);
auto& fd = sessionLocalFile->getFD();
int64_t oldOffset = sessionLocalFile->getOffset();
int64_t newOffset = getOffset();
bool skipReadAhead =
unlikely(isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_DISABLE_IO) ||
sessionLocalFile->getIsDirectIO());
ssize_t readAheadSize = skipReadAhead ? 0 : cfg->getTuneFileReadAheadSize();
ssize_t readAheadTriggerSize = cfg->getTuneFileReadAheadTriggerSize();
if( (oldOffset < 0) || (oldOffset != newOffset) )
{
sessionLocalFile->resetReadCounter(); // reset sequential read counter
sessionLocalFile->resetLastReadAheadTrigger();
}
else
{ // read continues at previous offset
LOG_DEBUG(logContext, Log_SPAM,
"fileID: " + sessionLocalFile->getFileID() + "; "
"offset: " + StringTk::int64ToStr(getOffset() ) );
}
size_t maxReadAtOnceLen = dataBufLen;
// reduce maxReadAtOnceLen to achieve better read/send aync overlap
/* (note: reducing makes only sense if we can rely on the kernel to do some read-ahead, so don't
reduce for direct IO and for random IO) */
if( (sessionLocalFile->getReadCounter() >= READ_USE_TUNEFILEREAD_TRIGGER) &&
!sessionLocalFile->getIsDirectIO() )
maxReadAtOnceLen = BEEGFS_MIN(dataBufLen, cfg->getTuneFileReadSize() );
off_t readOffset = getOffset();
ReadState readState(logContext.c_str(), getCount(), sessionLocalFile);
if (!isMsgValid() || !readStateInit(readState))
{
LogContext(logContext).logErr("Invalid read message.");
sessionLocalFile->setOffset(-1);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_INVAL);
return -1;
}
for( ; ; )
{
ssize_t readLength = getReadLength(readState, BEEGFS_MIN(maxReadAtOnceLen, readState.toBeRead));
readState.readRes = unlikely(isMsgHeaderFeatureFlagSet(READLOCALFILEMSG_FLAG_DISABLE_IO) ) ?
readLength : MsgHelperIO::pread(*fd, dataBuf, readLength, readOffset);
LOG_DEBUG(logContext, Log_SPAM,
"toBeRead: " + StringTk::int64ToStr(readState.toBeRead) + "; "
"readLength: " + StringTk::int64ToStr(readLength) + "; "
"readRes: " + StringTk::int64ToStr(readState.readRes) );
if(readState.readRes == readLength)
{ // simple success case
readState.toBeRead -= readState.readRes;
readOffset += readState.readRes;
int64_t newOffset = getOffset() + getCount() - readState.toBeRead;
sessionLocalFile->setOffset(newOffset); // update offset
sessionLocalFile->incReadCounter(readState.readRes); // update sequential read length
ctx.getStats()->incVals.diskReadBytes += readState.readRes; // update stats
bool isFinal = !readState.toBeRead;
if (readStateSendData(ctx.getSocket(), readState, sendBuf, isFinal) < 0)
{
LogContext(logContext).logErr("readStateSendData failed.");
sessionLocalFile->setOffset(-1);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
return -1;
}
checkAndStartReadAhead(sessionLocalFile, readAheadTriggerSize, newOffset, readAheadSize);
if(isFinal)
{ // we reached the end of the requested data
return getCount();
}
if (!readStateNext(readState))
{
LogContext(logContext).logErr("readStateNext failed.");
sessionLocalFile->setOffset(-1);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
return -1;
}
}
else
{ // readRes not as it should be => might be an error or just an end-of-file
if(readState.readRes == -1)
{ // read error occurred
LogContext(logContext).log(Log_WARNING, "Unable to read file data. "
"FileID: " + sessionLocalFile->getFileID() + "; "
"SysErr: " + System::getErrString() );
sessionLocalFile->setOffset(-1);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_INTERNAL);
return -1;
}
else
{ // just an end of file
LOG_DEBUG(logContext, Log_DEBUG,
"Unable to read all of the requested data (=> end of file)");
LOG_DEBUG(logContext, Log_DEBUG,
"offset: " + StringTk::int64ToStr(getOffset() ) + "; "
"count: " + StringTk::int64ToStr(getCount() ) + "; "
"readLength: " + StringTk::int64ToStr(readLength) + "; " +
"readRes: " + StringTk::int64ToStr(readState.readRes) + "; " +
"toBeRead: " + StringTk::int64ToStr(readState.toBeRead) );
readOffset += readState.readRes;
readState.toBeRead -= readState.readRes;
sessionLocalFile->setOffset(getOffset() + getCount() - readState.toBeRead); // update offset
sessionLocalFile->incReadCounter(readState.readRes); // update sequential read length
ctx.getStats()->incVals.diskReadBytes += readState.readRes; // update stats
if(readState.readRes > 0)
{
if (readStateSendData(ctx.getSocket(), readState, sendBuf, true) < 0)
{
LogContext(logContext).logErr("readStateSendData failed.");
sessionLocalFile->setOffset(-1);
sendLengthInfo(ctx.getSocket(), -FhgfsOpsErr_COMMUNICATION);
return -1;
}
}
else
sendLengthInfo(ctx.getSocket(), 0);
return(getCount() - readState.toBeRead);
}
}
} // end of for-loop
}
/**
* Starts read-ahead if enough sequential data has been read.
*
* Note: if getDisableIO() is true, we assume the caller sets readAheadSize==0, so getDisableIO()
* is not checked explicitly within this function.
*
* @sessionLocalFile lastReadAheadOffset will be updated if read-head was triggered
* @param readAheadTriggerSize the length of sequential IO that triggers read-ahead
* @param currentOffset current file offset (where read-ahead would start)
*/
template <class Msg, typename ReadState>
void ReadLocalFileMsgExBase<Msg, ReadState>::checkAndStartReadAhead(SessionLocalFile* sessionLocalFile,
ssize_t readAheadTriggerSize, off_t currentOffset, off_t readAheadSize)
{
std::string logContext = Msg::logContextPref + " (read-ahead)";
if(!readAheadSize)
return;
int64_t readCounter = sessionLocalFile->getReadCounter();
int64_t nextReadAheadTrigger = sessionLocalFile->getLastReadAheadTrigger() ?
sessionLocalFile->getLastReadAheadTrigger() + readAheadSize : readAheadTriggerSize;
if(readCounter < nextReadAheadTrigger)
return; // we're not at the trigger point yet
/* start read-head...
(read-ahead is supposed to be non-blocking if there are free slots in the device IO queue) */
LOG_DEBUG(logContext, Log_SPAM,
std::string("Starting read-ahead... ") +
"offset: " + StringTk::int64ToStr(currentOffset) + "; "
"size: " + StringTk::int64ToStr(readAheadSize) );
MsgHelperIO::readAhead(*sessionLocalFile->getFD(), currentOffset, readAheadSize);
// update trigger
sessionLocalFile->setLastReadAheadTrigger(readCounter);
}
/**
* Open the file if a filedescriptor is not already set in sessionLocalFile.
* If the file needs to be opened, this method will check the target consistency state before
* opening.
*
* @return we return the special value FhgfsOpsErr_COMMUNICATION here in some cases to indirectly
* ask the client for a retry (e.g. if target consistency is not good for buddymirrored chunks).
*/
template <class Msg, typename ReadState>
FhgfsOpsErr ReadLocalFileMsgExBase<Msg, ReadState>::openFile(const StorageTarget& target,
SessionLocalFile* sessionLocalFile)
{
std::string logContext = Msg::logContextPref + " (open)";
bool isBuddyMirrorChunk = sessionLocalFile->getIsMirrorSession();
if (sessionLocalFile->getFD().valid())
return FhgfsOpsErr_SUCCESS; // file already open => nothing to be done here
// file not open yet => get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) && isBuddyMirrorChunk)
{ // this is a request for a buddymirrored chunk on a non-good target
LogContext(logContext).log(Log_NOTICE, "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID()));
return FhgfsOpsErr_COMMUNICATION;
}
FhgfsOpsErr openChunkRes = sessionLocalFile->openFile(targetFD, getPathInfo(), false, NULL);
return openChunkRes;
}

View File

@@ -0,0 +1,216 @@
#pragma once
#include <common/net/message/session/rw/ReadLocalFileV2Msg.h>
#include <common/storage/StorageErrors.h>
#include <session/SessionLocalFileStore.h>
class StorageTarget;
/**
* Contains common data needed by implementations of the network protocol
* that send data to the client.
*/
struct ReadStateBase
{
const char* logContext;
uint64_t toBeRead;
SessionLocalFile* sessionLocalFile;
ssize_t readRes;
ReadStateBase(const char* logContext, uint64_t toBeRead,
SessionLocalFile* sessionLocalFile)
{
this->logContext = logContext;
this->toBeRead = toBeRead;
this->sessionLocalFile = sessionLocalFile;
}
};
template <class Msg, typename ReadState>
class ReadLocalFileMsgExBase : public Msg
{
public:
bool processIncoming(NetMessage::ResponseContext& ctx);
private:
SessionLocalFileStore* sessionLocalFiles;
FhgfsOpsErr openFile(const StorageTarget& target, SessionLocalFile* sessionLocalFile);
void checkAndStartReadAhead(SessionLocalFile* sessionLocalFile, ssize_t readAheadTriggerSize,
off_t currentOffset, off_t readAheadSize);
int64_t incrementalReadStatefulAndSendV2(NetMessage::ResponseContext& ctx,
SessionLocalFile* sessionLocalFile);
inline void sendLengthInfo(Socket* sock, int64_t lengthInfo)
{
static_cast<Msg&>(*this).sendLengthInfo(sock, lengthInfo);
}
inline bool readStateInit(ReadState& rs)
{
return static_cast<Msg&>(*this).readStateInit(rs);
}
inline ssize_t readStateSendData(Socket* sock, ReadState& rs, char* buf, bool isFinal)
{
return static_cast<Msg&>(*this).readStateSendData(sock, rs, buf, isFinal);
}
inline bool readStateNext(ReadState& rs)
{
return static_cast<Msg&>(*this).readStateNext(rs);
}
inline ssize_t getReadLength(ReadState& rs, ssize_t len)
{
return static_cast<Msg&>(*this).getReadLength(rs, len);
}
inline size_t getBuffers(NetMessage::ResponseContext& ctx, char** dataBuf, char** sendBuf)
{
return static_cast<Msg&>(*this).getBuffers(ctx, dataBuf, sendBuf);
}
public:
inline unsigned getMsgHeaderUserID() const
{
return static_cast<const Msg&>(*this).getMsgHeaderUserID();
}
inline bool isMsgHeaderFeatureFlagSet(unsigned flag) const
{
return static_cast<const Msg&>(*this).isMsgHeaderFeatureFlagSet(flag);
}
inline uint16_t getTargetID() const
{
return static_cast<const Msg&>(*this).getTargetID();
}
inline int64_t getOffset() const
{
return static_cast<const Msg&>(*this).getOffset();
}
inline int64_t getCount() const
{
return static_cast<const Msg&>(*this).getCount();
}
inline const char* getFileHandleID()
{
return static_cast<Msg&>(*this).getFileHandleID();
}
inline NumNodeID getClientNumID() const
{
return static_cast<const Msg&>(*this).getClientNumID();
}
inline unsigned getAccessFlags() const
{
return static_cast<const Msg&>(*this).getAccessFlags();
}
inline PathInfo* getPathInfo ()
{
return static_cast<Msg&>(*this).getPathInfo();
}
inline bool isMsgValid() const
{
return static_cast<const Msg&>(*this).isMsgValid();
}
};
/**
* Implements the Version 2 send protocol. It uses a preceding length info for each chunk.
*/
class ReadLocalFileV2MsgSender : public ReadLocalFileV2Msg
{
/* note on protocol: this works by sending an int64 before each data chunk, which contains the
length of the next data chunk; or a zero if no more data can be read; or a negative fhgfs
error code in case of an error */
public:
struct ReadState : public ReadStateBase
{
ReadState(const char* logContext, uint64_t toBeRead,
SessionLocalFile* sessionLocalFile) :
ReadStateBase(logContext, toBeRead, sessionLocalFile) {}
};
private:
friend class ReadLocalFileMsgExBase<ReadLocalFileV2MsgSender, ReadState>;
static std::string logContextPref;
/**
* Send only length information without a data packet. Typically used for the final length
* info at the end of the requested data.
*/
inline void sendLengthInfo(Socket* sock, int64_t lengthInfo)
{
lengthInfo = HOST_TO_LE_64(lengthInfo);
sock->send(&lengthInfo, sizeof(int64_t), 0);
}
/**
* No-op for this implementation.
*/
inline bool readStateInit(ReadState& rs)
{
return true;
}
/**
* Send length information and the corresponding data packet buffer.
*
* Note: rs.readRes is used to compute buf length for send()
*
* @param rs.readRes must not be negative
* @param buf the buffer with a preceding gap for the length info
* @param isFinal true if this is the last send, i.e. we have read all data
*/
inline ssize_t readStateSendData(Socket* sock, ReadState& rs, char* buf, bool isFinal)
{
ssize_t sendRes;
{
Serializer ser(buf, sizeof(int64_t));
ser % rs.readRes;
}
if (isFinal)
{
Serializer ser(buf + sizeof(int64_t) + rs.readRes, sizeof(int64_t));
ser % int64_t(0);
sendRes = sock->send(buf, (2*sizeof(int64_t) ) + rs.readRes, 0);
}
else
{
sendRes = sock->send(buf, sizeof(int64_t) + rs.readRes, 0);
}
return sendRes;
}
/**
* No-op for this implementation.
*/
inline bool readStateNext(ReadState& rs)
{
return true;
}
inline ssize_t getReadLength(ReadState& rs, ssize_t len)
{
return len;
}
size_t getBuffers(ResponseContext& ctx, char** dataBuf, char** sendBuf);
};
typedef ReadLocalFileMsgExBase<ReadLocalFileV2MsgSender,
ReadLocalFileV2MsgSender::ReadState> ReadLocalFileV2MsgEx;

View File

@@ -0,0 +1,926 @@
#include <program/Program.h>
#include <common/toolkit/MessagingTk.h>
#include <common/toolkit/SessionTk.h>
#include <common/toolkit/StorageTk.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <storage/StorageTargets.h>
#include <toolkit/StorageTkEx.h>
#include "WriteLocalFileMsgEx.h"
#ifdef BEEGFS_NVFS
#include "WriteLocalFileRDMAMsgEx.h"
#endif
#include <boost/lexical_cast.hpp>
static WriteLocalFileMsgEx forcedLinkage;
#ifdef BEEGFS_NVFS
static WriteLocalFileRDMAMsgEx forcedLinkageRDMA;
#endif
const std::string WriteLocalFileMsgSender::logContextPref = "WriteChunkFileMsg";
#ifdef BEEGFS_NVFS
const std::string WriteLocalFileRDMAMsgSender::logContextPref = "WriteChunkFileRDMAMsg";
#endif
template <class Msg, typename WriteState>
bool WriteLocalFileMsgExBase<Msg, WriteState>::processIncoming(NetMessage::ResponseContext& ctx)
{
App* app = Program::getApp();
bool success;
int64_t writeClientRes;
if (!isMsgValid())
{
sendResponse(ctx, FhgfsOpsErr_INVAL);
return false;
}
std::tie(success, writeClientRes) = write(ctx);
if (success)
{
sendResponse(ctx, writeClientRes);
// update operation counters
if (likely(writeClientRes > 0))
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_WRITEOPS, writeClientRes, getMsgHeaderUserID());
}
return success;
}
template <class Msg, typename WriteState>
std::pair<bool, int64_t> WriteLocalFileMsgExBase<Msg, WriteState>::write(NetMessage::ResponseContext& ctx)
{
std::string logContext = Msg::logContextPref + " incoming";
App* app = Program::getApp();
int64_t writeClientRes = -(int64_t)FhgfsOpsErr_INTERNAL; // bytes written or negative fhgfs err
FhgfsOpsErr finishMirroringRes = FhgfsOpsErr_INTERNAL;
std::string fileHandleID(getFileHandleID() );
bool isMirrorSession = isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR);
bool serverCrashed = false;
QuotaExceededErrorType quotaExceeded = QuotaExceededErrorType_NOT_EXCEEDED;
SessionStore* sessions = Program::getApp()->getSessions();
auto session = sessions->referenceOrAddSession(getClientNumID());
SessionLocalFileStore* sessionLocalFiles = session->getLocalFiles();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
bool chunkLocked = false;
// select the right targetID
uint16_t targetID = getTargetID();
if(isMirrorSession)
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
// note: only log message here, error handling will happen below through invalid targetFD
if(unlikely(!targetID) )
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
}
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
if (isMirrorSession)
{ /* buddy mirrored file => fail with Err_COMMUNICATION to make the requestor retry.
mgmt will mark this target as (p)offline in a few moments. */
LOG(GENERAL, NOTICE, "Unknown target ID, refusing request.", targetID);
return {false, FhgfsOpsErr_COMMUNICATION};
}
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
return {false, FhgfsOpsErr_UNKNOWNTARGET};
}
// check if we already have session for this file...
auto sessionLocalFile = sessionLocalFiles->referenceSession(
fileHandleID, targetID, isMirrorSession);
if(!sessionLocalFile)
{ // sessionLocalFile not exists yet => create, insert, re-get it
if(doSessionCheck() )
{ // server crashed during the write, maybe lost some data send error to client
LogContext log(logContext);
log.log(Log_WARNING, "Potential cache loss for open file handle. (Server crash detected.) "
"No session for file available. "
"FileHandleID: " + fileHandleID);
serverCrashed = true;
}
std::string fileID = SessionTk::fileIDFromHandleID(fileHandleID);
int openFlags = SessionTk::sysOpenFlagsFromFhgfsAccessFlags(getAccessFlags() );
auto newFile = boost::make_unique<SessionLocalFile>(fileHandleID, targetID, fileID, openFlags,
serverCrashed);
if(isMirrorSession)
newFile->setIsMirrorSession(true);
sessionLocalFile = sessionLocalFiles->addAndReferenceSession(std::move(newFile));
}
else
{ // session file exists
if(doSessionCheck() && sessionLocalFile->isServerCrashed() )
{ // server crashed during the write, maybe lost some data send error to client
LogContext log(logContext);
log.log(Log_SPAM, "Potential cache loss for open file handle. (Server crash detected.)"
"The session is marked as dirty. "
"FileHandleID: " + fileHandleID);
serverCrashed = true;
}
}
// check if the size quota is exceeded for the user or group
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA) &&
app->getConfig()->getQuotaEnableEnforcement() )
{
quotaExceeded = app->getExceededQuotaStores()->get(targetID)->isQuotaExceeded(getUserID(),
getGroupID(), QuotaLimitType_SIZE);
if(quotaExceeded != QuotaExceededErrorType_NOT_EXCEEDED)
{
LogContext(logContext).log(Log_NOTICE,
QuotaData::QuotaExceededErrorTypeToString(quotaExceeded) + " "
"UID: " + StringTk::uintToStr(this->getUserID()) + "; "
"GID: " + StringTk::uintToStr(this->getGroupID() ) );
// receive the message content before return with error
incrementalRecvPadding(ctx, getCount(), sessionLocalFile.get());
writeClientRes = -(int64_t) FhgfsOpsErr_DQUOT;
goto cleanup;
}
}
try
{
if(isMirrorSession && target->getBuddyResyncInProgress())
{
// mirrored chunk should be modified, check if resync is in progress and lock chunk
std::string chunkID = sessionLocalFile->getFileID();
chunkLockStore->lockChunk(targetID, chunkID);
chunkLocked = true;
}
// prepare file descriptor (if file not open yet then create/open it)
FhgfsOpsErr openRes = openFile(*target, sessionLocalFile.get());
if(unlikely(openRes != FhgfsOpsErr_SUCCESS) )
{
incrementalRecvPadding(ctx, getCount(), sessionLocalFile.get());
writeClientRes = -(int64_t)openRes;
goto cleanup;
}
// store mirror node reference in session and init mirrorToSock member
FhgfsOpsErr prepMirrorRes = prepareMirroring(ctx.getBuffer(), ctx.getBufferLength(),
sessionLocalFile.get(), *target);
if(unlikely(prepMirrorRes != FhgfsOpsErr_SUCCESS) )
{ // mirroring failed
incrementalRecvPadding(ctx, getCount(), sessionLocalFile.get());
writeClientRes = -(int64_t)prepMirrorRes;
goto cleanup;
}
// the actual write workhorse
int64_t writeLocalRes = incrementalRecvAndWriteStateful(ctx, sessionLocalFile.get());
// update client result, offset etc.
int64_t newOffset;
if(unlikely(writeLocalRes < 0) )
newOffset = -1; // writing failed
else
{ // writing succeeded
newOffset = getOffset() + writeLocalRes;
ctx.getStats()->incVals.diskWriteBytes += writeLocalRes; // update stats
}
sessionLocalFile->setOffset(newOffset);
writeClientRes = writeLocalRes;
}
catch(SocketException& e)
{
LogContext(logContext).logErr(std::string("SocketException occurred: ") + e.what() );
LogContext(logContext).log(Log_WARNING, std::string("Details: ") +
"sessionID: " + getClientNumID().str() + "; "
"fileHandle: " + std::string(sessionLocalFile->getFileHandleID() ) + "; "
"offset: " + StringTk::int64ToStr(getOffset() ) + "; "
"count: " + StringTk::int64ToStr(getCount() ) );
sessionLocalFile->setOffset(-1); // invalidate offset
finishMirroring(sessionLocalFile.get(), *target);
if (chunkLocked)
{
std::string chunkID = sessionLocalFile->getFileID();
chunkLockStore->unlockChunk(targetID, chunkID);
}
return {false, -1};
}
cleanup:
finishMirroringRes = finishMirroring(sessionLocalFile.get(), *target);
// check mirroring result (don't overwrite local error code, if any)
if(likely(writeClientRes > 0) )
{ // no local error => check mirroring result
if(unlikely(finishMirroringRes != FhgfsOpsErr_SUCCESS) )
writeClientRes = -finishMirroringRes; // mirroring failed => use err code as client result
}
if (chunkLocked)
{
std::string chunkID = sessionLocalFile->getFileID();
chunkLockStore->unlockChunk(targetID, chunkID);
}
if (serverCrashed)
writeClientRes = -(int64_t) FhgfsOpsErr_STORAGE_SRV_CRASHED;
return {true, writeClientRes};
}
ssize_t WriteLocalFileMsgSender::recvPadding(ResponseContext& ctx, int64_t toBeReceived)
{
Config* cfg = Program::getApp()->getConfig();
return ctx.getSocket()->recvT(ctx.getBuffer(),
BEEGFS_MIN(toBeReceived, ctx.getBufferLength()), 0, cfg->getConnMsgMediumTimeout());
}
#ifdef BEEGFS_NVFS
ssize_t WriteLocalFileRDMAMsgSender::recvPadding(ResponseContext& ctx, int64_t toBeReceived)
{
RdmaInfo* rdma = getRdmaInfo();
uint64_t rBuf;
size_t rLen;
uint64_t rOff;
if (!rdma->next(rBuf, rLen, rOff))
return -1;
ssize_t recvLength = BEEGFS_MIN(ctx.getBufferLength(), toBeReceived);
recvLength = BEEGFS_MIN(recvLength, (ssize_t)(rLen - rOff));
return ctx.getSocket()->read(ctx.getBuffer(), recvLength, 0, rBuf+rOff, rdma->key);
}
#endif /* BEEGFS_NVFS */
/**
* Note: New offset is saved in the session by the caller afterwards (to make life easier).
* @return number of written bytes or negative fhgfs error code
*/
template <class Msg, typename WriteState>
int64_t WriteLocalFileMsgExBase<Msg, WriteState>::incrementalRecvAndWriteStateful(NetMessage::ResponseContext& ctx,
SessionLocalFile* sessionLocalFile)
{
std::string logContext = Msg::logContextPref + " (write incremental)";
Config* cfg = Program::getApp()->getConfig();
// we can securely cast getTuneFileWriteSize to size_t below to make a comparision possible, as
// it can technically never be negative and will therefore always fit into size_t
const ssize_t exactStaticRecvSize = sessionLocalFile->getIsDirectIO()
? ctx.getBufferLength()
: BEEGFS_MIN(ctx.getBufferLength(), (size_t)cfg->getTuneFileWriteSize() );
auto& fd = sessionLocalFile->getFD();
int64_t oldOffset = sessionLocalFile->getOffset();
int64_t newOffset = getOffset();
bool useSyncRange = false; // true if sync_file_range should be called
if( (oldOffset < 0) || (oldOffset != newOffset) )
sessionLocalFile->resetWriteCounter(); // reset sequential write counter
else
{ // continue at previous offset => increase sequential write counter
LOG_DEBUG(logContext, Log_SPAM, "Offset: " + StringTk::int64ToStr(getOffset() ) );
sessionLocalFile->incWriteCounter(getCount() );
ssize_t syncSize = unlikely(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) ) ?
0 : cfg->getTuneFileWriteSyncSize();
if (syncSize && (sessionLocalFile->getWriteCounter() >= syncSize) )
useSyncRange = true;
}
// incrementally receive file contents...
WriteState writeState(logContext.c_str(), exactStaticRecvSize,
getCount(), getOffset(), sessionLocalFile);
if (!writeStateInit(writeState))
return -FhgfsOpsErr_COMMUNICATION;
do
{
// receive some bytes...
LOG_DEBUG(logContext, Log_SPAM,
"receiving... (remaining: " + StringTk::intToStr(writeState.toBeReceived) + ")");
ssize_t recvRes = writeStateRecvData(ctx, writeState);
if (recvRes < 0)
{
LogContext(logContext).log(Log_WARNING, "Socket data transfer error occurred. ");
return -FhgfsOpsErr_COMMUNICATION;
}
// forward to mirror...
FhgfsOpsErr mirrorRes = sendToMirror(ctx.getBuffer(), recvRes,
writeState.writeOffset, writeState.toBeReceived, sessionLocalFile);
if(unlikely(mirrorRes != FhgfsOpsErr_SUCCESS) )
{ // mirroring failed
incrementalRecvPadding(ctx, writeState.toBeReceived, sessionLocalFile);
return -FhgfsOpsErr_COMMUNICATION;
}
// write to underlying file system...
int errCode = 0;
ssize_t writeRes = unlikely(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) )
? recvRes
: doWrite(*fd, ctx.getBuffer(), recvRes, writeState.writeOffset, errCode);
writeState.toBeReceived -= recvRes;
// handle write errors...
if(unlikely(writeRes != recvRes) )
{ // didn't write all of the received data
if(writeRes == -1)
{ // write error occurred
LogContext(logContext).log(Log_WARNING, "Write error occurred. "
"FileHandleID: " + sessionLocalFile->getFileHandleID() + "."
"Target: " + StringTk::uintToStr(sessionLocalFile->getTargetID() ) + ". "
"File: " + sessionLocalFile->getFileID() + ". "
"SysErr: " + System::getErrString(errCode) );
LogContext(logContext).log(Log_NOTICE, std::string("Additional info: "
"FD: ") + StringTk::intToStr(*fd) + " " +
"OpenFlags: " + StringTk::intToStr(sessionLocalFile->getOpenFlags() ) + " " +
"received: " + StringTk::intToStr(recvRes) + ".");
incrementalRecvPadding(ctx, writeState.toBeReceived, sessionLocalFile);
return -FhgfsOpsErrTk::fromSysErr(errCode);
}
else
{ // wrote only a part of the data, not all of it
LogContext(logContext).log(Log_WARNING,
"Unable to write all of the received data. "
"target: " + StringTk::uintToStr(sessionLocalFile->getTargetID() ) + "; "
"file: " + sessionLocalFile->getFileID() + "; "
"sysErr: " + System::getErrString(errCode) );
incrementalRecvPadding(ctx, writeState.toBeReceived, sessionLocalFile);
// return bytes received so far minus num bytes that were not written with last write
return (getCount() - writeState.toBeReceived) - (recvRes - writeRes);
}
}
writeState.writeOffset += writeRes;
recvRes = writeStateNext(writeState, writeRes);
if (recvRes != 0)
return recvRes;
} while(writeState.toBeReceived);
LOG_DEBUG(logContext, Log_SPAM,
std::string("Received and wrote all the data") );
// commit to storage device queue...
if (useSyncRange)
{
// advise kernel to commit written data to storage device in max_sectors_kb chunks.
/* note: this is async if there are free slots in the request queue
/sys/block/<...>/nr_requests. (optimal_io_size is not honoured as of linux-3.4) */
off64_t syncSize = sessionLocalFile->getWriteCounter();
off64_t syncOffset = getOffset() + getCount() - syncSize;
MsgHelperIO::syncFileRange(*fd, syncOffset, syncSize);
sessionLocalFile->resetWriteCounter();
}
return getCount();
}
/**
* Write until everything was written (handle short-writes) or an error occured
*/
template <class Msg, typename WriteState>
ssize_t WriteLocalFileMsgExBase<Msg, WriteState>::doWrite(int fd, char* buf, size_t count, off_t offset, int& outErrno)
{
size_t sumWriteRes = 0;
do
{
ssize_t writeRes =
MsgHelperIO::pwrite(fd, buf + sumWriteRes, count - sumWriteRes, offset + sumWriteRes);
if (unlikely(writeRes == -1) )
{
sumWriteRes = (sumWriteRes > 0) ? sumWriteRes : writeRes;
outErrno = errno;
break;
}
sumWriteRes += writeRes;
} while (sumWriteRes != count);
return sumWriteRes;
}
/**
* Receive and discard data.
*/
template <class Msg, typename WriteState>
void WriteLocalFileMsgExBase<Msg, WriteState>::incrementalRecvPadding(NetMessage::ResponseContext& ctx,
int64_t padLen, SessionLocalFile* sessionLocalFile)
{
uint64_t toBeReceived = padLen;
while(toBeReceived)
{
ssize_t recvRes = recvPadding(ctx, toBeReceived);
if (recvRes == -1)
break;
// forward to mirror...
FhgfsOpsErr mirrorRes = sendToMirror(ctx.getBuffer(), recvRes,
getOffset() + padLen - toBeReceived, toBeReceived, sessionLocalFile);
if(unlikely(mirrorRes != FhgfsOpsErr_SUCCESS) )
{ // mirroring failed
/* ... but if we are in this method, then something went wrong anyways, so don't set
needs-resync here or report any error to caller. */
}
toBeReceived -= recvRes;
}
}
template <class Msg, typename WriteState>
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::openFile(const StorageTarget& target,
SessionLocalFile* sessionLocalFile)
{
std::string logContext = Msg::logContextPref + " (write incremental)";
bool useQuota = isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA);
bool enforceQuota = Program::getApp()->getConfig()->getQuotaEnableEnforcement();
bool isBuddyMirrorChunk = sessionLocalFile->getIsMirrorSession();
if (sessionLocalFile->getFD().valid())
return FhgfsOpsErr_SUCCESS; // file already open => nothing to be done here
// file not open yet => get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
isBuddyMirrorChunk &&
!isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
{ // this is a request for a buddymirrored chunk on a non-good primary
LogContext(logContext).log(Log_NOTICE, "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID()));
return FhgfsOpsErr_COMMUNICATION;
}
SessionQuotaInfo quotaInfo(useQuota, enforceQuota, getUserID(), getGroupID() );
FhgfsOpsErr openChunkRes = sessionLocalFile->openFile(targetFD, getPathInfo(), true, &quotaInfo);
return openChunkRes;
}
/**
* Prepares mirroring by storing mirrorNode reference in file session and setting the mirrorToSock
* member variable.
*
* Note: Mirror node reference needs to be released on file session close.
*
* @param buf used to send initial write msg header to mirror.
* @param requestorSock used to receive padding if mirroring fails.
* @return FhgfsOpsErr_COMMUNICATION if communication with mirror failed.
*/
template <class Msg, typename WriteState>
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::prepareMirroring(char* buf, size_t bufLen,
SessionLocalFile* sessionLocalFile, StorageTarget& target)
{
std::string logContext = Msg::logContextPref + " (prepare mirroring)";
// check if mirroring is enabled
if(!isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_FORWARD) )
return FhgfsOpsErr_SUCCESS;
App* app = Program::getApp();
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
TargetStateStore* targetStates = app->getTargetStateStore();
// check if secondary is offline or in unclear state
uint16_t secondaryTargetID = mirrorBuddies->getSecondaryTargetID(getTargetID() );
if(unlikely(!secondaryTargetID) )
{
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
return FhgfsOpsErr_UNKNOWNTARGET;
}
CombinedTargetState secondaryState;
bool getSecondaryStateRes = targetStates->getState(secondaryTargetID, secondaryState);
if(unlikely(!getSecondaryStateRes) )
{
LOG_DEBUG(logContext, Log_DEBUG,
"Refusing request. Secondary target has invalid state. "
"targetID: " + StringTk::uintToStr(secondaryTargetID) );
return FhgfsOpsErr_COMMUNICATION;
}
if( (secondaryState.reachabilityState != TargetReachabilityState_ONLINE) ||
(secondaryState.consistencyState != TargetConsistencyState_GOOD) )
{
if(secondaryState.reachabilityState == TargetReachabilityState_OFFLINE)
{ // buddy is offline => mark needed resync and continue with local operation
LOG_DEBUG(logContext, Log_DEBUG,
"Secondary is offline and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
// buddy is marked offline, so local msg processing will be done and buddy needs resync
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
if(secondaryState.consistencyState != TargetConsistencyState_NEEDS_RESYNC)
{ // unclear buddy state => client must try again
LOG_DEBUG(logContext, Log_DEBUG,
"Unclear secondary state, caller will have to try again later. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
return FhgfsOpsErr_COMMUNICATION;
}
}
// store mirror node reference in session...
NodeHandle mirrorToNode = sessionLocalFile->getMirrorNode();
if(!mirrorToNode)
{
NodeStoreServers* storageNodes = app->getStorageNodes();
TargetMapper* targetMapper = app->getTargetMapper();
FhgfsOpsErr referenceErr;
mirrorToNode = storageNodes->referenceNodeByTargetID(secondaryTargetID, targetMapper,
&referenceErr);
if(unlikely(referenceErr != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).logErr(
"Unable to forward to mirror target: " + StringTk::uintToStr(secondaryTargetID) + "; "
"Error: " + boost::lexical_cast<std::string>(referenceErr));
return referenceErr;
}
mirrorToNode = sessionLocalFile->setMirrorNodeExclusive(mirrorToNode);
}
// send initial write msg header to mirror (retry loop)...
for( ; ; )
{
try
{
// acquire connection to mirror node and send write msg...
mirrorToSock = mirrorToNode->getConnPool()->acquireStreamSocket();
WriteLocalFileMsg mirrorWriteMsg(getClientNumID(), getFileHandleID(), getTargetID(),
getPathInfo(), getAccessFlags(), getOffset(), getCount());
if(doSessionCheck() )
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_SESSION_CHECK);
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) )
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_DISABLE_IO);
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA) )
mirrorWriteMsg.setUserdataForQuota(getUserID(), getGroupID() );
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR);
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
unsigned msgLength = mirrorWriteMsg.serializeMessage(buf, bufLen).second;
mirrorToSock->send(buf, msgLength, 0);
return FhgfsOpsErr_SUCCESS;
}
catch(SocketConnectException& e)
{
LogContext(logContext).log(Log_CRITICAL, "Unable to connect to mirror node: " +
mirrorToNode->getNodeIDWithTypeStr() + "; "
"Msg: " + e.what() );
}
catch(SocketException& e)
{
LogContext(logContext).log(Log_CRITICAL, "Communication with mirror node failed: " +
mirrorToNode->getNodeIDWithTypeStr() + "; "
"Msg: " + e.what() );
if(mirrorToSock)
mirrorToNode->getConnPool()->invalidateStreamSocket(mirrorToSock);
mirrorToSock = NULL;
}
// error occurred if we got here
if(!mirrorRetriesLeft)
break;
mirrorRetriesLeft--;
// next round will be a retry
LogContext(logContext).log(Log_NOTICE, "Retrying mirror communication: " +
mirrorToNode->getNodeIDWithTypeStr() );
} // end of retry for-loop
// all retries exhausted if we got here
return FhgfsOpsErr_COMMUNICATION;
}
/**
* Send file contents to mirror.
*
* Note: Supports retries only at beginning of write msg.
*
* @param buf the buffer that should be sent to the mirror.
* @param offset the offset within the chunk file (only used if communication fails and we need to
* start over with a new WriteMsg to the mirror).
* @param toBeMirrored total remaining mirror data including given bufLen (only used for retries).
* @return FhgfsOpsErr_COMMUNICATION if mirroring fails.
*/
template <class Msg, typename WriteState>
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::sendToMirror(const char* buf, size_t bufLen,
int64_t offset, int64_t toBeMirrored, SessionLocalFile* sessionLocalFile)
{
std::string logContext = Msg::logContextPref + " (send to mirror)";
// check if mirroring enabled
if(!mirrorToSock)
return FhgfsOpsErr_SUCCESS; // either no mirroring enabled or all retries exhausted
bool isRetryRound = false;
// send raw data (retry loop)...
// (note: if sending fails, retrying requires sending of a new WriteMsg)
for( ; ; )
{
try
{
if(unlikely(isRetryRound) )
{ // retry requires reconnect and resend of write msg with current offset
auto mirrorToNode = sessionLocalFile->getMirrorNode();
mirrorToSock = mirrorToNode->getConnPool()->acquireStreamSocket();
WriteLocalFileMsg mirrorWriteMsg(getClientNumID(), getFileHandleID(),
getTargetID(), getPathInfo(), getAccessFlags(), offset, toBeMirrored);
if(doSessionCheck() )
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_SESSION_CHECK);
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_DISABLE_IO) )
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_DISABLE_IO);
if(isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_USE_QUOTA) )
mirrorWriteMsg.setUserdataForQuota(getUserID(), getGroupID() );
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR);
mirrorWriteMsg.addMsgHeaderFeatureFlag(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
const auto mirrorBuf = MessagingTk::createMsgVec(mirrorWriteMsg);
mirrorToSock->send(&mirrorBuf[0], mirrorBuf.size(), 0);
}
mirrorToSock->send(buf, bufLen, 0);
return FhgfsOpsErr_SUCCESS;
}
catch(SocketConnectException& e)
{
auto mirrorToNode = sessionLocalFile->getMirrorNode();
LogContext(logContext).log(Log_CRITICAL, "Unable to connect to mirror node: " +
mirrorToNode->getNodeIDWithTypeStr() + "; "
"Msg: " + e.what() );
}
catch(SocketException& e)
{
LogContext(logContext).log(Log_CRITICAL, "Communication with mirror node failed: " +
sessionLocalFile->getMirrorNode()->getNodeIDWithTypeStr() + "; "
"Msg: " + e.what() );
if(mirrorToSock)
sessionLocalFile->getMirrorNode()->getConnPool()->invalidateStreamSocket(mirrorToSock);
mirrorToSock = NULL;
}
// error occurred if we got here
if(!mirrorRetriesLeft)
break;
// only allow retries if we're still at the beginning of the write msg.
/* (this is because later we don't have all the client data available; and without the mirror
response we don't know for sure whether previously sent data was really written or not.) */
if(toBeMirrored != getCount() )
break;
mirrorRetriesLeft--;
// next round will be a retry
LogContext(logContext).log(Log_NOTICE, "Retrying mirror communication: " +
sessionLocalFile->getMirrorNode()->getNodeIDWithTypeStr() );
isRetryRound = true;
} // end of retry for-loop
// all retries exhausted if we got here
return FhgfsOpsErr_COMMUNICATION;
}
/**
* Receive response from mirror node, check result, clean up (release mirror sock).
*
* Note: Does not do retries on communication errors
*/
template <class Msg, typename WriteState>
FhgfsOpsErr WriteLocalFileMsgExBase<Msg, WriteState>::finishMirroring(SessionLocalFile* sessionLocalFile,
StorageTarget& target)
{
std::string logContext = Msg::logContextPref + " (finish mirroring)";
// check if mirroring enabled
if(!mirrorToSock)
return FhgfsOpsErr_SUCCESS; // mirroring disabled
App* app = Program::getApp();
auto mirrorToNode = sessionLocalFile->getMirrorNode();
WriteLocalFileRespMsg* writeRespMsg;
int64_t mirrorWriteRes;
// receive write msg response from mirror...
/* note: we don't have the file contents that were sent by the client anymore at this point, so
we cannot do retries here with a new WriteMsg. */
try
{
// receive write msg response...
auto resp = MessagingTk::recvMsgBuf(*mirrorToSock);
if (resp.empty())
{ // error
LogContext(logContext).log(Log_WARNING,
"Failed to receive response from mirror: " + mirrorToSock->getPeername() );
goto cleanup_commerr;
}
// got response => deserialize it...
auto respMsg = app->getNetMessageFactory()->createFromBuf(std::move(resp));
if(unlikely(respMsg->getMsgType() != NETMSGTYPE_WriteLocalFileResp) )
{ // response invalid (wrong msgType)
LogContext(logContext).logErr(
"Received invalid response type: " + StringTk::intToStr(respMsg->getMsgType() ) +"; "
"expected type: " + StringTk::intToStr(NETMSGTYPE_WriteLocalFileResp) + ". "
"Disconnecting: " + mirrorToSock->getPeername() );
goto cleanup_commerr;
}
// check mirror result and release mirror socket...
mirrorToNode->getConnPool()->releaseStreamSocket(mirrorToSock);
writeRespMsg = (WriteLocalFileRespMsg*)respMsg.get();
mirrorWriteRes = writeRespMsg->getValue();
if(likely(mirrorWriteRes == getCount() ) )
return FhgfsOpsErr_SUCCESS; // mirror successfully wrote all of the data
if(mirrorWriteRes >= 0)
{ // mirror only wrote a part of the data
LogContext(logContext).log(Log_WARNING,
"Mirror did not write all of the data (no space left); "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
"fileHandle: " + sessionLocalFile->getFileHandleID() );
return FhgfsOpsErr_NOSPACE;
}
if(mirrorWriteRes == -FhgfsOpsErr_UNKNOWNTARGET)
{
/* local msg processing shall be done and buddy needs resync
(this is normal when a storage is restarted without a broken secondary target, so we
report success to a client in this case) */
LogContext(logContext).log(Log_DEBUG,
"Secondary reports unknown target error and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
if(mirrorWriteRes == -FhgfsOpsErr_STORAGE_SRV_CRASHED)
LogContext(logContext).log(Log_NOTICE, "Potential cache loss for open file handle. "
"(Mirror server crash detected.) "
"FileHandleID: " + sessionLocalFile->getFileHandleID() + "; "
"Mirror: " + mirrorToNode->getNodeIDWithTypeStr() );
// mirror encountered an error
return (FhgfsOpsErr)-mirrorWriteRes; // write response contains negative fhgfs error code
}
catch(SocketException& e)
{
LogContext(logContext).logErr(std::string("SocketException: ") + e.what() );
LogContext(logContext).log(Log_WARNING, "Additional info: "
"mirror node: " + mirrorToNode->getNodeIDWithTypeStr() + "; "
"fileHandle: " + sessionLocalFile->getFileHandleID() );
}
// cleanup after communication error...
cleanup_commerr:
mirrorToNode->getConnPool()->invalidateStreamSocket(mirrorToSock);
return FhgfsOpsErr_COMMUNICATION;
}
template <class Msg, typename WriteState>
bool WriteLocalFileMsgExBase<Msg, WriteState>::doSessionCheck()
{ // do session check only when it is not a mirror session
return isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_BUDDYMIRROR) ? false :
isMsgHeaderFeatureFlagSet(WRITELOCALFILEMSG_FLAG_SESSION_CHECK);
}

View File

@@ -0,0 +1,213 @@
#pragma once
#include <common/net/message/session/rw/WriteLocalFileMsg.h>
#include <common/net/message/session/rw/WriteLocalFileRespMsg.h>
#include <session/SessionLocalFile.h>
#include <common/storage/StorageErrors.h>
#define WRITEMSG_MIRROR_RETRIES_NUM 1
class StorageTarget;
/**
* Contains common data needed by implementations of the network protocol
* that receive data from the client.
*/
struct WriteStateBase
{
const char* logContext;
ssize_t exactStaticRecvSize;
ssize_t recvLength;
int64_t toBeReceived;
off_t writeOffset;
SessionLocalFile* sessionLocalFile;
WriteStateBase(const char* logContext, ssize_t exactStaticRecvSize,
int64_t toBeReceived, off_t writeOffset, SessionLocalFile* sessionLocalFile)
{
this->logContext = logContext;
this->exactStaticRecvSize = exactStaticRecvSize;
this->toBeReceived = toBeReceived;
this->writeOffset = writeOffset;
this->sessionLocalFile = sessionLocalFile;
recvLength = BEEGFS_MIN(exactStaticRecvSize, toBeReceived);
}
};
template <class Msg, typename WriteState>
class WriteLocalFileMsgExBase : public Msg
{
private:
Socket* mirrorToSock;
unsigned mirrorRetriesLeft;
public:
bool processIncoming(NetMessage::ResponseContext& ctx);
WriteLocalFileMsgExBase() : Msg()
{
mirrorToSock = NULL;
mirrorRetriesLeft = WRITEMSG_MIRROR_RETRIES_NUM;
}
private:
std::pair<bool, int64_t> write(NetMessage::ResponseContext& ctx);
ssize_t doWrite(int fd, char* buf, size_t count, off_t offset, int& outErrno);
FhgfsOpsErr openFile(const StorageTarget& target, SessionLocalFile* sessionLocalFile);
FhgfsOpsErr prepareMirroring(char* buf, size_t bufLen,
SessionLocalFile* sessionLocalFile, StorageTarget& target);
FhgfsOpsErr sendToMirror(const char* buf, size_t bufLen, int64_t offset, int64_t toBeMirrored,
SessionLocalFile* sessionLocalFile);
FhgfsOpsErr finishMirroring(SessionLocalFile* sessionLocalFile, StorageTarget& target);
bool doSessionCheck();
int64_t incrementalRecvAndWriteStateful(NetMessage::ResponseContext& ctx,
SessionLocalFile* sessionLocalFile);
void incrementalRecvPadding(NetMessage::ResponseContext& ctx, int64_t padLen,
SessionLocalFile* sessionLocalFile);
inline ssize_t recvPadding(NetMessage::ResponseContext& ctx, int64_t toBeReceived)
{
return static_cast<Msg&>(*this).recvPadding(ctx, toBeReceived);
}
inline void sendResponse(NetMessage::ResponseContext& ctx, int err)
{
return static_cast<Msg&>(*this).sendResponse(ctx, err);
}
inline bool writeStateInit(WriteState& ws)
{
return static_cast<Msg&>(*this).writeStateInit(ws);
}
inline ssize_t writeStateRecvData(NetMessage::ResponseContext& ctx, WriteState& ws)
{
return static_cast<Msg&>(*this).writeStateRecvData(ctx, ws);
}
inline size_t writeStateNext(WriteState& ws, ssize_t writeRes)
{
return static_cast<Msg&>(*this).writeStateNext(ws, writeRes);
}
public:
inline bool isMsgValid() const
{
return static_cast<const Msg&>(*this).isMsgValid();
}
inline bool isMsgHeaderFeatureFlagSet(unsigned flag) const
{
return static_cast<const Msg&>(*this).isMsgHeaderFeatureFlagSet(flag);
}
inline unsigned getMsgHeaderUserID() const
{
return static_cast<const Msg&>(*this).getMsgHeaderUserID();
}
inline uint16_t getTargetID() const
{
return static_cast<const Msg&>(*this).getTargetID();
}
inline int64_t getOffset() const
{
return static_cast<const Msg&>(*this).getOffset();
}
inline unsigned getUserID() const
{
return static_cast<const Msg&>(*this).getUserID();
}
inline unsigned getGroupID() const
{
return static_cast<const Msg&>(*this).getGroupID();
}
inline int64_t getCount() const
{
return static_cast<const Msg&>(*this).getCount();
}
inline const char* getFileHandleID()
{
return static_cast<Msg&>(*this).getFileHandleID();
}
inline NumNodeID getClientNumID() const
{
return static_cast<const Msg&>(*this).getClientNumID();
}
inline unsigned getAccessFlags() const
{
return static_cast<const Msg&>(*this).getAccessFlags();
}
inline PathInfo* getPathInfo ()
{
return static_cast<Msg&>(*this).getPathInfo();
}
};
/**
* Implements the recv protocol.
*/
class WriteLocalFileMsgSender : public WriteLocalFileMsg
{
public:
struct WriteState : public WriteStateBase
{
WriteState(const char* logContext, ssize_t exactStaticRecvSize,
int64_t toBeReceived, off_t writeOffset, SessionLocalFile* sessionLocalFile) :
WriteStateBase(logContext, exactStaticRecvSize, toBeReceived, writeOffset,
sessionLocalFile) {}
};
private:
friend class WriteLocalFileMsgExBase<WriteLocalFileMsgSender, WriteState>;
static const std::string logContextPref;
ssize_t recvPadding(ResponseContext& ctx, int64_t toBeReceived);
inline void sendResponse(ResponseContext& ctx, int err)
{
ctx.sendResponse(WriteLocalFileRespMsg(err));
}
inline bool writeStateInit(WriteState& ws)
{
return true;
}
inline ssize_t writeStateRecvData(ResponseContext& ctx, WriteState& ws)
{
AbstractApp* app = PThread::getCurrentThreadApp();
int connMsgMediumTimeout = app->getCommonConfig()->getConnMsgMediumTimeout();
ws.recvLength = BEEGFS_MIN(ws.exactStaticRecvSize, ws.toBeReceived);
return ctx.getSocket()->recvExactT(ctx.getBuffer(), ws.recvLength, 0, connMsgMediumTimeout);
}
inline size_t writeStateNext(WriteState& ws, ssize_t writeRes)
{
return 0;
}
};
typedef WriteLocalFileMsgExBase<WriteLocalFileMsgSender,
WriteLocalFileMsgSender::WriteState> WriteLocalFileMsgEx;

View File

@@ -0,0 +1,94 @@
#pragma once
#ifdef BEEGFS_NVFS
#include <common/net/message/session/rw/WriteLocalFileRDMAMsg.h>
#include <common/net/message/session/rw/WriteLocalFileRDMARespMsg.h>
#include <common/components/worker/Worker.h>
#include <session/SessionLocalFile.h>
#include <common/storage/StorageErrors.h>
#include "WriteLocalFileMsgEx.h"
/**
* Implements RDMA read protocol.
*/
class WriteLocalFileRDMAMsgSender : public WriteLocalFileRDMAMsg
{
public:
struct WriteState : public WriteStateBase
{
RdmaInfo* rdma;
uint64_t rBuf;
size_t rLen;
uint64_t rOff;
int64_t recvSize;
WriteState(const char* logContext, ssize_t exactStaticRecvSize,
int64_t toBeReceived, off_t writeOffset, SessionLocalFile* sessionLocalFile) :
WriteStateBase(logContext, exactStaticRecvSize, toBeReceived, writeOffset,
sessionLocalFile)
{
recvSize = toBeReceived;
}
};
private:
friend class WriteLocalFileMsgExBase<WriteLocalFileRDMAMsgSender, WriteState>;
static const std::string logContextPref;
ssize_t recvPadding(ResponseContext& ctx, int64_t toBeReceived);
inline void sendResponse(ResponseContext& ctx, int err)
{
ctx.sendResponse(WriteLocalFileRDMARespMsg(err));
}
inline bool writeStateInit(WriteState& ws)
{
ws.rdma = getRdmaInfo();
if (unlikely(!ws.rdma->next(ws.rBuf, ws.rLen, ws.rOff)))
{
LogContext(ws.logContext).logErr("No entities in RDMA buffers.");
return false;
}
return true;
}
inline ssize_t writeStateRecvData(ResponseContext& ctx, WriteState& ws)
{
// Cannot RDMA anything larger than WORKER_BUFIN_SIZE in a single operation
// because that is the size of the buffer passed in by the Worker.
// TODO: pass around a Buffer with a length instead of unqualified char*.
ws.recvLength = BEEGFS_MIN(
BEEGFS_MIN(
BEEGFS_MIN(ws.exactStaticRecvSize, ws.toBeReceived),
(ssize_t)(ws.rLen - ws.rOff)),
WORKER_BUFIN_SIZE);
return ctx.getSocket()->read(ctx.getBuffer(), ws.recvLength, 0, ws.rBuf + ws.rOff, ws.rdma->key);
}
inline size_t writeStateNext(WriteState& ws, ssize_t writeRes)
{
ws.rOff += writeRes;
if (ws.toBeReceived > 0 && ws.rOff == ws.rLen)
{
if (unlikely(!ws.rdma->next(ws.rBuf, ws.rLen, ws.rOff)))
{
LogContext(ws.logContext).logErr("RDMA buffers expended but not all data received. toBeReceived=" +
StringTk::uint64ToStr(ws.toBeReceived) + "; "
"target: " + StringTk::uintToStr(ws.sessionLocalFile->getTargetID() ) + "; "
"file: " + ws.sessionLocalFile->getFileID() + "; ");
return ws.recvSize - ws.toBeReceived;
}
}
return 0;
}
};
typedef WriteLocalFileMsgExBase<WriteLocalFileRDMAMsgSender,
WriteLocalFileRDMAMsgSender::WriteState> WriteLocalFileRDMAMsgEx;
#endif /* BEEGFS_NVFS */

View File

@@ -0,0 +1,20 @@
#include <program/Program.h>
#include <common/net/message/storage/GetHighResStatsRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include "GetHighResStatsMsgEx.h"
bool GetHighResStatsMsgEx::processIncoming(ResponseContext& ctx)
{
HighResStatsList statsHistory;
uint64_t lastStatsMS = getValue();
// get stats history
StatsCollector* statsCollector = Program::getApp()->getStatsCollector();
statsCollector->getStatsSince(lastStatsMS, statsHistory);
ctx.sendResponse(GetHighResStatsRespMsg(&statsHistory) );
return true;
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <common/storage/StorageErrors.h>
#include <common/net/message/storage/GetHighResStatsMsg.h>
class GetHighResStatsMsgEx : public GetHighResStatsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,56 @@
#include <program/Program.h>
#include <common/net/message/storage/StatStoragePathRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include "StatStoragePathMsgEx.h"
bool StatStoragePathMsgEx::processIncoming(ResponseContext& ctx)
{
int64_t sizeTotal = 0;
int64_t sizeFree = 0;
int64_t inodesTotal = 0;
int64_t inodesFree = 0;
FhgfsOpsErr statRes = statStoragePath(&sizeTotal, &sizeFree, &inodesTotal, &inodesFree);
ctx.sendResponse(StatStoragePathRespMsg(statRes, sizeTotal, sizeFree, inodesTotal, inodesFree) );
App* app = Program::getApp();
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_STATSTORAGEPATH, getMsgHeaderUserID() );
return true;
}
FhgfsOpsErr StatStoragePathMsgEx::statStoragePath(int64_t* outSizeTotal, int64_t* outSizeFree,
int64_t* outInodesTotal, int64_t* outInodesFree)
{
const char* logContext = "StatStoragePathMsg (stat path)";
App* app = Program::getApp();
auto* const target = app->getStorageTargets()->getTarget(getTargetID());
if (!target)
{
LogContext(logContext).logErr("Unknown targetID: " + StringTk::uintToStr(getTargetID() ) );
return FhgfsOpsErr_UNKNOWNTARGET;
}
const auto& targetPath = target->getPath().str();
bool statSuccess = StorageTk::statStoragePath(targetPath, outSizeTotal, outSizeFree,
outInodesTotal, outInodesFree);
if(unlikely(!statSuccess) )
{ // error
LogContext(logContext).logErr("Unable to statfs() storage path: " + targetPath +
" (SysErr: " + System::getErrString() );
return FhgfsOpsErr_INTERNAL;
}
// read and use value from manual free space override file (if it exists)
StorageTk::statStoragePathOverride(targetPath, outSizeFree, outInodesFree);
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,18 @@
#pragma once
#include <common/storage/StorageErrors.h>
#include <common/net/message/storage/StatStoragePathMsg.h>
// stat of the path to the storage directory, result is similar to statfs
class StatStoragePathMsgEx : public StatStoragePathMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
FhgfsOpsErr statStoragePath(int64_t* outSizeTotal, int64_t* outSizeFree,
int64_t* outInodesTotal, int64_t* outInodesFree);
};

View File

@@ -0,0 +1,432 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/TruncLocalFileRespMsg.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "TruncLocalFileMsgEx.h"
#include <boost/lexical_cast.hpp>
#define TRUNCLOCALFILE_CHUNKOPENLAGS (O_CREAT|O_WRONLY|O_LARGEFILE)
bool TruncLocalFileMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "TruncChunkFileMsg incoming";
App* app = Program::getApp();
uint16_t targetID;
int targetFD;
bool chunkLocked = false;
FhgfsOpsErr clientErrRes;
DynamicAttribs dynAttribs; // inits storageVersion to 0 (=> initially invalid)
StorageTarget* target;
// select the right targetID
targetID = getTargetID();
if(isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
if(unlikely(!targetID) )
{ // unknown group ID
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{ // unknown targetID
if (isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR))
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
mgmt will mark this target as (p)offline in a few moments. */
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
return true;
}
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
return true;
}
{ // get targetFD and check consistency state
bool skipResponse = false;
targetFD = getTargetFD(*target, ctx, &skipResponse);
if(unlikely(targetFD == -1) )
{ // failed => consistency state not good
if(skipResponse)
goto skip_response; // GenericResponseMsg sent
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
// forward to secondary (if appropriate)
clientErrRes = forwardToSecondary(*target, ctx, &chunkLocked);
if(unlikely(clientErrRes != FhgfsOpsErr_SUCCESS) )
{
if(clientErrRes == FhgfsOpsErr_COMMUNICATION)
goto skip_response; // GenericResponseMsg sent
goto send_response;
}
{ // valid targetID
std::string entryID(getEntryID() );
// generate path to chunk file...
Path chunkDirPath;
std::string chunkFilePathStr;
const PathInfo *pathInfo = getPathInfo();
bool hasOrigFeature = pathInfo->hasOrigFeature();
StorageTk::getChunkDirChunkFilePath(pathInfo, entryID, hasOrigFeature, chunkDirPath,
chunkFilePathStr);
// truncate file...
clientErrRes = truncFile(targetID, targetFD, &chunkDirPath, chunkFilePathStr, entryID,
hasOrigFeature);
/* clientErrRes == FhgfsOpsErr_PATHNOTEXISTS && !getFileSize() is special we need to fake
* the attributes, to inform the metaserver about the new file size with storageVersion!=0 */
if(clientErrRes == FhgfsOpsErr_SUCCESS ||
(clientErrRes == FhgfsOpsErr_PATHNOTEXISTS && !getFilesize() ) )
{ // truncation successful
LOG_DEBUG(logContext, Log_DEBUG, "File truncated: " + chunkFilePathStr);
// get updated dynamic attribs...
if(!isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_NODYNAMICATTRIBS) )
{
if (clientErrRes == FhgfsOpsErr_SUCCESS)
getDynamicAttribsByPath(targetFD, chunkFilePathStr.c_str(), targetID, entryID,
dynAttribs);
else
{ // clientErrRes == FhgfsOpsErr_PATHNOTEXISTS && !getFileSize()
getFakeDynAttribs(targetID, entryID, dynAttribs);
}
}
// change to SUCCESS if it was FhgfsOpsErr_PATHNOTEXISTS
clientErrRes = FhgfsOpsErr_SUCCESS;
}
}
send_response:
if(chunkLocked) // unlock chunk
app->getChunkLockStore()->unlockChunk(targetID, getEntryID() );
// send response...
ctx.sendResponse(
TruncLocalFileRespMsg(clientErrRes, dynAttribs.filesize, dynAttribs.allocedBlocks,
dynAttribs.modificationTimeSecs, dynAttribs.lastAccessTimeSecs,
dynAttribs.storageVersion) );
skip_response:
// update operation counters
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_TRUNCLOCALFILE, getMsgHeaderUserID() );
return true;
}
/**
* @param outResponseSent true if a response was sent from within this method; can only be true if
* -1 is returned.
* @return -1 if consistency state was not good (in which case a special response is sent within
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
*/
int TruncLocalFileMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
bool* outResponseSent)
{
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR);
*outResponseSent = false;
// get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
isBuddyMirrorChunk &&
!isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
{ // this is a msg to a non-good primary
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID());
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
*outResponseSent = true;
return -1;
}
return targetFD;
}
FhgfsOpsErr TruncLocalFileMsgEx::truncFile(uint16_t targetId, int targetFD,
const Path* chunkDirPath, const std::string& chunkFilePathStr, std::string entryID,
bool hasOrigFeature)
{
const char* logContext = "TruncLocalFileMsg incoming";
App* app = Program::getApp();
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
int truncRes = MsgHelperIO::truncateAt(targetFD, chunkFilePathStr.c_str(), getFilesize() );
if(!truncRes)
return FhgfsOpsErr_SUCCESS; // truncate succeeded
// file or path just doesn't exist or real error?
int truncErrCode = errno;
if(unlikely(truncErrCode != ENOENT) )
{ // error
clientErrRes = FhgfsOpsErrTk::fromSysErr(truncErrCode);
if (clientErrRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
LogContext(logContext).logErr("Unable to truncate file: " + chunkFilePathStr + ". " +
"SysErr: " + System::getErrString(truncErrCode) );
return clientErrRes;
}
// ENOENT => file (and possibly path to file (dirs) ) doesn't exist
/* note: if the file doesn't exist, it's generally not an error.
but if it should grow to a certain size, we have to create it... */
if(!getFilesize() )
return FhgfsOpsErr_PATHNOTEXISTS; // nothing to be done
// create the file and re-size it
bool useQuota = isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_USE_QUOTA);
bool enforceQuota = app->getConfig()->getQuotaEnableEnforcement();
SessionQuotaInfo quotaInfo(useQuota, enforceQuota, getUserID(), getGroupID());
const ExceededQuotaStorePtr exceededQuotaStore = app->getExceededQuotaStores()->get(targetId);
ChunkStore* chunkDirStore = app->getChunkDirStore();
int fd;
int openFlags = TRUNCLOCALFILE_CHUNKOPENLAGS;
FhgfsOpsErr mkChunkRes = chunkDirStore->openChunkFile(targetFD, chunkDirPath, chunkFilePathStr,
hasOrigFeature, openFlags, &fd, &quotaInfo, exceededQuotaStore);
if (unlikely(mkChunkRes == FhgfsOpsErr_NOTOWNER && useQuota) )
{
// it already logs a message, so need to further check this ret value
chunkDirStore->chmodV2ChunkDirPath(targetFD, chunkDirPath, entryID);
mkChunkRes = chunkDirStore->openChunkFile(
targetFD, chunkDirPath, chunkFilePathStr, hasOrigFeature, openFlags, &fd, &quotaInfo,
exceededQuotaStore);
}
if (mkChunkRes != FhgfsOpsErr_SUCCESS)
{
if (mkChunkRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
LogContext(logContext).logErr("Failed to create chunkFile: " + chunkFilePathStr);
return mkChunkRes;
}
// file created => trunc it
int ftruncRes = ftruncate(fd, getFilesize() );
if(unlikely(ftruncRes == -1) )
{ // error
clientErrRes = FhgfsOpsErrTk::fromSysErr(errno);
if (clientErrRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
LogContext(logContext).logErr(
"Unable to truncate file (after creation): " + chunkFilePathStr + ". " +
"Length: " + StringTk::int64ToStr(getFilesize() ) + ". " +
"SysErr: " + System::getErrString() );
}
// close file
int closeRes = close(fd);
if(unlikely(closeRes == -1) )
{ // error
clientErrRes = FhgfsOpsErrTk::fromSysErr(errno);
if (clientErrRes == FhgfsOpsErr_INTERNAL) // only log unhandled errors
LogContext(logContext).logErr(
"Unable to close file (after creation/truncation): " + chunkFilePathStr + ". " +
"Length: " + StringTk::int64ToStr(getFilesize() ) + ". " +
"SysErr: " + System::getErrString() );
}
return clientErrRes;
}
bool TruncLocalFileMsgEx::getDynamicAttribsByPath(const int dirFD, const char* path,
uint16_t targetID, std::string fileID, DynamicAttribs& outDynAttribs)
{
SyncedStoragePaths* syncedPaths = Program::getApp()->getSyncedStoragePaths();
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // L O C K path
// note: this is locked because we need to get the filesize together with the storageVersion
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(dirFD, path,
&outDynAttribs.filesize, &outDynAttribs.allocedBlocks, &outDynAttribs.modificationTimeSecs,
&outDynAttribs.lastAccessTimeSecs);
if(getDynAttribsRes)
outDynAttribs.storageVersion = storageVersion;
syncedPaths->unlockPath(fileID, targetID); // U N L O C K path
return getDynAttribsRes;
}
/**
* Note: only for fileSize == 0 and if the file does not exist yet
*/
bool TruncLocalFileMsgEx::getFakeDynAttribs(uint16_t targetID, std::string fileID,
DynamicAttribs& outDynAttribs)
{
SyncedStoragePaths* syncedPaths = Program::getApp()->getSyncedStoragePaths();
uint64_t storageVersion = syncedPaths->lockPath(fileID, targetID); // L O C K path
int64_t currentTimeSecs = TimeAbs().getTimeval()->tv_sec;
outDynAttribs.filesize = 0;
outDynAttribs.allocedBlocks = 0;
outDynAttribs.modificationTimeSecs = currentTimeSecs;
outDynAttribs.lastAccessTimeSecs = currentTimeSecs; /* actually not correct, but better than
* 1970 */
outDynAttribs.storageVersion = storageVersion;
syncedPaths->unlockPath(fileID, targetID); // U N L O C K path
return true;
}
/**
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
*
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
* case *outChunkLocked==false is guaranteed).
* @throw SocketException if sending of GenericResponseMsg fails.
*/
FhgfsOpsErr TruncLocalFileMsgEx::forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked)
{
const char* logContext = "TruncLocalFileMsgEx incoming (forward to secondary)";
App* app = Program::getApp();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
*outChunkLocked = false;
if(!isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR) ||
isMsgHeaderFeatureFlagSet(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
return FhgfsOpsErr_SUCCESS; // nothing to do
// mirrored chunk should be modified, check if resync is in progress and lock chunk
*outChunkLocked = target.getBuddyResyncInProgress();
if(*outChunkLocked)
chunkLockStore->lockChunk(target.getID(), getEntryID() ); // lock chunk
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
addMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_TruncLocalFileResp);
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
// remove the flag that we just added for secondary
unsetMsgHeaderFeatureFlag(TRUNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
if(unlikely(
(commRes == FhgfsOpsErr_COMMUNICATION) &&
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
{
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
// buddy is marked offline, so local msg processing will be done and buddy needs resync
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
}
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_DEBUG, "Forwarding failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
"error: " + boost::lexical_cast<std::string>(commRes));
if(*outChunkLocked)
{ // unlock chunk
chunkLockStore->unlockChunk(target.getID(), getEntryID() );
*outChunkLocked = false;
}
std::string genericRespStr = "Communication with secondary failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(genericRespStr)));
return FhgfsOpsErr_COMMUNICATION;
}
TruncLocalFileRespMsg* respMsg = (TruncLocalFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr secondaryRes = respMsg->getResult();
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
{
if(secondaryRes == FhgfsOpsErr_UNKNOWNTARGET)
{
/* local msg processing shall be done and buddy needs resync
(this is normal when a storage is restarted without a broken secondary target, so we
report success to a client in this case) */
LogContext(logContext).log(Log_DEBUG,
"Secondary reports unknown target error and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
if(secondaryRes != FhgfsOpsErr_TOOBIG) // "too big" is a valid error if max filesize exceeded
{
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
boost::lexical_cast<std::string>(secondaryRes) + "; "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
}
return secondaryRes;
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,37 @@
#pragma once
#include <common/net/message/storage/TruncLocalFileMsg.h>
#include <common/storage/StorageErrors.h>
#include <common/storage/Path.h>
class StorageTarget;
class TruncLocalFileMsgEx : public TruncLocalFileMsg
{
private:
struct DynamicAttribs
{
DynamicAttribs() : filesize(0), allocedBlocks(0), modificationTimeSecs(0),
lastAccessTimeSecs(0), storageVersion(0) {}
int64_t filesize;
int64_t allocedBlocks; // allocated 512byte blocks (relevant for sparse files)
int64_t modificationTimeSecs;
int64_t lastAccessTimeSecs;
uint64_t storageVersion;
};
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
FhgfsOpsErr truncFile(uint16_t targetId, int targetFD, const Path* chunkDirPath,
const std::string& chunkFilePathStr, std::string entryID, bool hasOrigFeature);
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
bool getDynamicAttribsByPath(const int dirFD, const char* path, uint16_t targetID,
std::string fileID, DynamicAttribs& outDynAttribs);
bool getFakeDynAttribs(uint16_t targetID, std::string fileID, DynamicAttribs& outDynAttribs);
FhgfsOpsErr forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked);
};

View File

@@ -0,0 +1,152 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/attribs/GetChunkFileAttribsRespMsg.h>
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "GetChunkFileAttribsMsgEx.h"
bool GetChunkFileAttribsMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "GetChunkFileAttribsMsg incoming";
App* app = Program::getApp();
std::string entryID(getEntryID() );
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
int targetFD;
struct stat statbuf{};
uint64_t storageVersion = 0;
// select the right targetID
uint16_t targetID = getTargetID();
if(isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
// note: only log message here, error handling will happen below through invalid targetFD
if(unlikely(!targetID) )
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
}
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
if (isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR))
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
mgmt will mark this target as (p)offline in a few moments. */
LOG(GENERAL, NOTICE, "Unknown target ID, refusing request.", targetID);
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
return true;
}
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
{ // get targetFD and check consistency state
bool skipResponse = false;
targetFD = getTargetFD(*target, ctx, &skipResponse);
if(unlikely(targetFD == -1) )
{ // failed => consistency state not good
memset(&statbuf, 0, sizeof(statbuf) ); // (just to mute clang warning)
if(skipResponse)
goto skip_response; // GenericResponseMsg sent
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
{ // valid targetID
SyncedStoragePaths* syncedPaths = app->getSyncedStoragePaths();
int statErrCode = 0;
std::string chunkPath = StorageTk::getFileChunkPath(getPathInfo(), entryID);
uint64_t newStorageVersion = syncedPaths->lockPath(entryID, targetID); // L O C K path
int statRes = fstatat(targetFD, chunkPath.c_str(), &statbuf, 0);
if(statRes)
{ // file not exists or error
statErrCode = errno;
}
else
{
storageVersion = newStorageVersion;
}
syncedPaths->unlockPath(entryID, targetID); // U N L O C K path
// note: non-existing file is not an error (storage version is 0, so nothing will be
// updated at the metadata node)
if((statRes == -1) && (statErrCode != ENOENT))
{ // error
clientErrRes = FhgfsOpsErr_INTERNAL;
LogContext(logContext).logErr(
"Unable to stat file: " + chunkPath + ". " + "SysErr: "
+ System::getErrString(statErrCode));
}
}
send_response:
ctx.sendResponse(
GetChunkFileAttribsRespMsg(clientErrRes, statbuf.st_size, statbuf.st_blocks,
statbuf.st_mtime, statbuf.st_atime, storageVersion) );
skip_response:
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(),
StorageOpCounter_GETLOCALFILESIZE, getMsgHeaderUserID() );
return true;
}
/**
* @param outResponseSent true if a response was sent from within this method; can only be true if
* -1 is returned.
* @return -1 if consistency state was not good (in which case a special response is sent within
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
*/
int GetChunkFileAttribsMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
bool* outResponseSent)
{
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR);
*outResponseSent = false;
// get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
isBuddyMirrorChunk &&
!isMsgHeaderFeatureFlagSet(GETCHUNKFILEATTRSMSG_FLAG_BUDDYMIRROR_SECOND) )
{ // this is a msg to a non-good primary
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID());
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
*outResponseSent = true;
return -1;
}
return targetFD;
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include <common/net/message/storage/attribs/GetChunkFileAttribsMsg.h>
class StorageTarget;
class GetChunkFileAttribsMsgEx : public GetChunkFileAttribsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
};

View File

@@ -0,0 +1,351 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/attribs/SetLocalAttrRespMsg.h>
#include <common/storage/StorageDefinitions.h>
#include <common/toolkit/MessagingTk.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "SetLocalAttrMsgEx.h"
#include <utime.h>
#include <boost/lexical_cast.hpp>
bool SetLocalAttrMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "SetLocalAttrMsgEx incoming";
App* app = Program::getApp();
const SettableFileAttribs* attribs = getAttribs();
int validAttribs = getValidAttribs();
uint16_t targetID;
bool chunkLocked = false;
int targetFD;
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
DynamicFileAttribs currentDynAttribs(0, 0, 0, 0, 0);
StorageTarget* target;
// select the right targetID
targetID = getTargetID();
if(isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
if(unlikely(!targetID) )
{ // unknown group ID
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{ // unknown targetID
if (isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR))
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
mgmt will mark this target as (p)offline in a few moments. */
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
return true;
}
LOG(GENERAL, ERR, "Unknown target ID.", targetID);
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
return true;
}
{ // get targetFD and check consistency state
bool skipResponse = false;
targetFD = getTargetFD(*target, ctx, &skipResponse);
if(unlikely(targetFD == -1) )
{ // failed => consistency state not good
if(skipResponse)
goto skip_response; // GenericResponseMsg sent
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
// forward to secondary (if appropriate)
clientErrRes = forwardToSecondary(*target, ctx, &chunkLocked);
if(unlikely(clientErrRes != FhgfsOpsErr_SUCCESS) )
{
if(clientErrRes == FhgfsOpsErr_COMMUNICATION)
goto skip_response; // GenericResponseMsg sent
goto send_response;
}
if(validAttribs & (SETATTR_CHANGE_MODIFICATIONTIME | SETATTR_CHANGE_LASTACCESSTIME) )
{ // we only handle access and modification time updates here
struct timespec times[2] = {{0, 0}, {0, 0}};
if (validAttribs & SETATTR_CHANGE_LASTACCESSTIME)
{
times[MsgHelperIO_ATIME_POS].tv_sec = attribs->lastAccessTimeSecs;
times[MsgHelperIO_ATIME_POS].tv_nsec = 0;
}
else
times[MsgHelperIO_ATIME_POS].tv_nsec = UTIME_OMIT;
if (validAttribs & SETATTR_CHANGE_MODIFICATIONTIME)
{
times[MsgHelperIO_MTIME_POS].tv_sec = attribs->modificationTimeSecs;
times[MsgHelperIO_MTIME_POS].tv_nsec = 0;
}
else
times[MsgHelperIO_MTIME_POS].tv_nsec = UTIME_OMIT;
// generate path to chunk file...
std::string pathStr;
pathStr = StorageTk::getFileChunkPath(getPathInfo(), getEntryID() );
// update timestamps...
// in case of a timestamp update we need extra information on the metadata server, namely
// a storageVersion and the current dynamic attribs of the chunk
// => set the new times while holding the lock and return the current attribs and a
// storageVersion in response later
uint64_t storageVersion = Program::getApp()->getSyncedStoragePaths()->lockPath(getEntryID(),
targetID);
int utimeRes = MsgHelperIO::utimensat(targetFD, pathStr.c_str(), times, 0);
if (utimeRes == 0)
{
bool getDynAttribsRes = StorageTkEx::getDynamicFileAttribs(targetFD, pathStr.c_str(),
&currentDynAttribs.fileSize, &currentDynAttribs.numBlocks,
&currentDynAttribs.modificationTimeSecs, &currentDynAttribs.lastAccessTimeSecs);
// If stat failed (after utimensat worked!), something really bad happened, so the
// attribs are definitely invalid. Otherwise set storageVersion in dynAttribs
if (getDynAttribsRes)
currentDynAttribs.storageVersion = storageVersion;
}
else if (errno == ENOENT)
{
// Entry doesn't exist. Not an error, but we need to return fake dynamic attributes for
// the metadata server to calc the values (fake in this sense means, we send the
// timestamps back that we tried to set, but have real filesize and numBlocks, i.e. 0
currentDynAttribs.storageVersion = storageVersion;
currentDynAttribs.fileSize = 0;
currentDynAttribs.numBlocks = 0;
currentDynAttribs.modificationTimeSecs = attribs->modificationTimeSecs;
currentDynAttribs.lastAccessTimeSecs = attribs->lastAccessTimeSecs;
}
else
{ // error
int errCode = errno;
LogContext(logContext).logErr("Unable to change file time: " + pathStr + ". "
"SysErr: " + System::getErrString());
clientErrRes = FhgfsOpsErrTk::fromSysErr(errCode);
}
Program::getApp()->getSyncedStoragePaths()->unlockPath(getEntryID(), targetID);
}
if(isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_USE_QUOTA) &&
(validAttribs & (SETATTR_CHANGE_USERID | SETATTR_CHANGE_GROUPID) ) )
{ // we only handle UID and GID updates here
uid_t uid = -1;
gid_t gid = -1;
if(validAttribs & SETATTR_CHANGE_USERID)
uid = attribs->userID;
if(validAttribs & SETATTR_CHANGE_GROUPID)
gid = attribs->groupID;
// generate path to chunk file...
std::string pathStr;
pathStr = StorageTk::getFileChunkPath(getPathInfo(), getEntryID() );
// update UID and GID...
int chownRes = fchownat(targetFD, pathStr.c_str(), uid, gid, 0);
if(chownRes == -1)
{ // could be an error
int errCode = errno;
if(errCode != ENOENT)
{ // unhandled chown() error
LogContext(logContext).logErr("Unable to change file owner: " + pathStr + ". "
"SysErr: " + System::getErrString() );
clientErrRes = FhgfsOpsErrTk::fromSysErr(errCode);
}
}
}
send_response:
if(chunkLocked) // unlock chunk
app->getChunkLockStore()->unlockChunk(targetID, getEntryID() );
ctx.sendResponse(SetLocalAttrRespMsg(clientErrRes, currentDynAttribs));
skip_response:
// update operation counters...
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_SETLOCALATTR,
getMsgHeaderUserID() );
return true;
}
/**
* @param outResponseSent true if a response was sent from within this method; can only be true if
* -1 is returned.
* @return -1 if consistency state was not good (in which case a special response is sent within
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
*/
int SetLocalAttrMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
bool* outResponseSent)
{
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR);
*outResponseSent = false;
// get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
isBuddyMirrorChunk &&
!isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND) )
{ // this is a msg to a non-good primary
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID());
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
*outResponseSent = true;
return -1;
}
return targetFD;
}
/**
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
*
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
* case *outChunkLocked==false is guaranteed).
* @throw SocketException if sending of GenericResponseMsg fails.
*/
FhgfsOpsErr SetLocalAttrMsgEx::forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked)
{
const char* logContext = "SetLocalAttrMsg incoming (forward to secondary)";
App* app = Program::getApp();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
*outChunkLocked = false;
if(!isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR) ||
isMsgHeaderFeatureFlagSet(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND) )
return FhgfsOpsErr_SUCCESS; // nothing to do
// mirrored chunk should be modified, check if resync is in progress and lock chunk
*outChunkLocked = target.getBuddyResyncInProgress();
if(*outChunkLocked)
chunkLockStore->lockChunk(target.getID(), getEntryID() ); // lock chunk
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
addMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND);
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_SetLocalAttrResp);
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
// remove the flag that we just added for secondary
unsetMsgHeaderFeatureFlag(SETLOCALATTRMSG_FLAG_BUDDYMIRROR_SECOND);
if(unlikely(
(commRes == FhgfsOpsErr_COMMUNICATION) &&
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
{
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
// buddy is marked offline, so local msg processing will be done and buddy needs resync
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
}
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_DEBUG, "Forwarding failed: "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
"error: " + boost::lexical_cast<std::string>(commRes));
if(*outChunkLocked)
{ // unlock chunk
chunkLockStore->unlockChunk(target.getID(), getEntryID() );
*outChunkLocked = false;
}
std::string genericRespStr = "Communication with secondary failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(genericRespStr)));
return FhgfsOpsErr_COMMUNICATION;
}
const auto respMsg = (const SetLocalAttrRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr secondaryRes = respMsg->getResult();
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
{
if(secondaryRes == FhgfsOpsErr_UNKNOWNTARGET)
{
/* local msg processing shall be done and buddy needs resync
(this is normal when a storage is restarted without a broken secondary target, so we
report success to a client in this case) */
LogContext(logContext).log(Log_DEBUG,
"Secondary reports unknown target error and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
boost::lexical_cast<std::string>(secondaryRes) + "; "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
return secondaryRes;
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,19 @@
#pragma once
#include <common/storage/StorageErrors.h>
#include <common/net/message/storage/attribs/SetLocalAttrMsg.h>
class StorageTarget;
class SetLocalAttrMsgEx : public SetLocalAttrMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
FhgfsOpsErr forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked);
};

View File

@@ -0,0 +1,26 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/chunkbalancing/CpChunkPathsRespMsg.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "CpChunkPathsMsgEx.h"
bool CpChunkPathsMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "CpChunkPathsMsg incoming";
LogContext(logContext).logErr("This message is not yet implemented. \n It should relay chunk information from metadata to storage and trigger copy chunk operation. ");
FhgfsOpsErr cpMsgRes = FhgfsOpsErr_SUCCESS;
ctx.sendResponse(CpChunkPathsRespMsg(cpMsgRes));
return true;
}
ChunkBalancerJob* CpChunkPathsMsgEx::addChunkBalanceJob()
{
std::lock_guard<Mutex> mutexLock(ChunkBalanceJobMutex);
ChunkBalancerJob* chunkBalanceJob = nullptr;
return chunkBalanceJob;
}

View File

@@ -0,0 +1,16 @@
#pragma once
#include <common/net/message/storage/chunkbalancing/CpChunkPathsMsg.h>
class ChunkBalancerJob;
class CpChunkPathsMsgEx : public CpChunkPathsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
Mutex ChunkBalanceJobMutex;
ChunkBalancerJob* addChunkBalanceJob();
};

View File

@@ -0,0 +1,57 @@
#include <common/net/message/storage/creating/RmChunkPathsRespMsg.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "RmChunkPathsMsgEx.h"
bool RmChunkPathsMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "RmChunkPathsMsg incoming";
App* app = Program::getApp();
ChunkStore* chunkStore = app->getChunkDirStore();
uint16_t targetID;
StringList& relativePaths = getRelativePaths();
StringList failedPaths;
targetID = getTargetID();
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{ // unknown targetID
LogContext(logContext).logErr("Unknown targetID: " + StringTk::uintToStr(targetID));
failedPaths = relativePaths;
}
else
{ // valid targetID
const int targetFD = isMsgHeaderFeatureFlagSet(RMCHUNKPATHSMSG_FLAG_BUDDYMIRROR)
? *target->getMirrorFD()
: *target->getChunkFD();
for(StringListIter iter = relativePaths.begin(); iter != relativePaths.end(); iter++)
{
// remove chunk
int unlinkRes = unlinkat(targetFD, (*iter).c_str(), 0);
if ( (unlinkRes != 0) && (errno != ENOENT) )
{
LogContext(logContext).logErr(
"Unable to remove entry: " + *iter + "; error: " + System::getErrString());
failedPaths.push_back(*iter);
continue;
}
// removal succeeded; this might have been the last entry => try to remove parent directory
Path parentDirPath(StorageTk::getPathDirname(*iter));
chunkStore->rmdirChunkDirPath(targetFD, &parentDirPath);
}
}
ctx.sendResponse(RmChunkPathsRespMsg(&failedPaths) );
return true;
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <common/net/message/storage/creating/RmChunkPathsMsg.h>
class RmChunkPathsMsgEx : public RmChunkPathsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,268 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/creating/UnlinkLocalFileRespMsg.h>
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "UnlinkLocalFileMsgEx.h"
#include <boost/lexical_cast.hpp>
bool UnlinkLocalFileMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "UnlinkChunkFileMsg incoming";
App* app = Program::getApp();
ChunkStore* chunkDirStore = app->getChunkDirStore();
FhgfsOpsErr clientErrRes = FhgfsOpsErr_SUCCESS;
uint16_t targetID;
bool chunkLocked = false;
int targetFD = -1;
Path chunkDirPath;
const PathInfo* pathInfo = getPathInfo();
bool hasOrigFeature = pathInfo->hasOrigFeature();
int unlinkRes = -1;
StorageTarget* target;
// select the right targetID
targetID = getTargetID();
if(isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
if(unlikely(!targetID) )
{ // unknown target
LogContext(logContext).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(getTargetID() ) );
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
if (isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR))
{ /* buddy mirrored file => fail with GenericResp to make the caller retry.
mgmt will mark this target as (p)offline in a few moments. */
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, "Unknown target ID"));
return true;
}
LOG(GENERAL, ERR, "Unknown targetID.", targetID);
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
return true;
}
{ // get targetFD and check consistency state
bool skipResponse = false;
targetFD = getTargetFD(*target, ctx, &skipResponse);
if(unlikely(targetFD == -1) )
{ // failed => consistency state not good
if(skipResponse)
goto skip_response; // GenericResponseMsg sent
clientErrRes = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
// forward to secondary (if appropriate)
clientErrRes = forwardToSecondary(*target, ctx, &chunkLocked);
if(unlikely(clientErrRes != FhgfsOpsErr_SUCCESS) )
{
if(clientErrRes == FhgfsOpsErr_COMMUNICATION)
goto skip_response; // GenericResponseMsg sent
goto send_response;
}
{ // valid targetID
// generate path to chunk file...
std::string chunkFilePathStr; // chunkDirPathStr + '/' + entryID
StorageTk::getChunkDirChunkFilePath(pathInfo, getEntryID(), hasOrigFeature, chunkDirPath,
chunkFilePathStr);
unlinkRes = unlinkat(targetFD, chunkFilePathStr.c_str(), 0);
if( (unlinkRes == -1) && (errno != ENOENT) )
{ // error
LogContext(logContext).logErr("Unable to unlink file: " + chunkFilePathStr + ". " +
"SysErr: " + System::getErrString() );
clientErrRes = FhgfsOpsErr_INTERNAL;
}
else
{ // success
LogContext(logContext).log(Log_DEBUG, "File unlinked: " + chunkFilePathStr);
}
}
send_response:
if(chunkLocked) // unlock chunk
app->getChunkLockStore()->unlockChunk(targetID, getEntryID() );
ctx.sendResponse(UnlinkLocalFileRespMsg(clientErrRes) );
skip_response:
// try to rmdir chunkDirPath (in case this was the last chunkfile in a dir)
if (!unlinkRes && hasOrigFeature)
chunkDirStore->rmdirChunkDirPath(targetFD, &chunkDirPath);
// update operation counters...
app->getNodeOpStats()->updateNodeOp(ctx.getSocket()->getPeerIP(), StorageOpCounter_UNLINK,
getMsgHeaderUserID() );
return true;
}
/**
* @param outResponseSent true if a response was sent from within this method; can only be true if
* -1 is returned.
* @return -1 if consistency state was not good (in which case a special response is sent within
* this method), otherwise the file descriptor to chunks dir (or mirror dir).
*/
int UnlinkLocalFileMsgEx::getTargetFD(const StorageTarget& target, ResponseContext& ctx,
bool* outResponseSent)
{
bool isBuddyMirrorChunk = isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR);
*outResponseSent = false;
// get targetFD and check consistency state
const auto consistencyState = target.getConsistencyState();
const int targetFD = isBuddyMirrorChunk ? *target.getMirrorFD() : *target.getChunkFD();
if(unlikely(consistencyState != TargetConsistencyState_GOOD) &&
isBuddyMirrorChunk &&
!isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
{ // this is a msg to a non-good primary
std::string respMsgLogStr = "Refusing request. Target consistency is not good. "
"targetID: " + StringTk::uintToStr(target.getID());
ctx.sendResponse(
GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR, std::move(respMsgLogStr)));
*outResponseSent = true;
return -1;
}
return targetFD;
}
/**
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
*
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
* case *outChunkLocked==false is guaranteed).
* @throw SocketException if sending of GenericResponseMsg fails.
*/
FhgfsOpsErr UnlinkLocalFileMsgEx::forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked)
{
const char* logContext = "UnlinkLocalFileMsg incoming (forward to secondary)";
App* app = Program::getApp();
ChunkLockStore* chunkLockStore = app->getChunkLockStore();
*outChunkLocked = false;
if(!isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR) ||
isMsgHeaderFeatureFlagSet(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) )
return FhgfsOpsErr_SUCCESS; // nothing to do
// mirrored chunk should be modified, check if resync is in progress and lock chunk
*outChunkLocked = target.getBuddyResyncInProgress();
if(*outChunkLocked)
chunkLockStore->lockChunk(target.getID(), getEntryID() ); // lock chunk
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
addMsgHeaderFeatureFlag(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_UnlinkLocalFileResp);
RequestResponseTarget rrTarget(getTargetID(), app->getTargetMapper(), app->getStorageNodes(),
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
// remove the flag that we just added for secondary
unsetMsgHeaderFeatureFlag(UNLINKLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
if(unlikely(
(commRes == FhgfsOpsErr_COMMUNICATION) &&
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
{
LOG_DEBUG(logContext, Log_DEBUG, std::string("Secondary is offline and will need resync. ") +
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
// buddy is marked offline, so local msg processing will be done and buddy needs resync
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
}
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_DEBUG, "Forwarding failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) + "; "
"error: " + boost::lexical_cast<std::string>(commRes));
if(*outChunkLocked)
{ // unlock chunk
chunkLockStore->unlockChunk(target.getID(), getEntryID() );
*outChunkLocked = false;
}
std::string genericRespStr = "Communication with secondary failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() );
ctx.sendResponse(GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR,
std::move(genericRespStr)));
return FhgfsOpsErr_COMMUNICATION;
}
UnlinkLocalFileRespMsg* respMsg = (UnlinkLocalFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr secondaryRes = respMsg->getResult();
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
{
if(secondaryRes == FhgfsOpsErr_UNKNOWNTARGET)
{
/* local msg processing shall be done and buddy needs resync
(this is normal when a storage is restarted without a broken secondary target, so we
report success to a client in this case) */
LogContext(logContext).log(Log_DEBUG,
"Secondary reports unknown target error and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
boost::lexical_cast<std::string>(secondaryRes) + "; "
"mirror buddy group ID: " + StringTk::uintToStr(getTargetID() ) );
return secondaryRes;
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,17 @@
#pragma once
#include <common/net/message/storage/creating/UnlinkLocalFileMsg.h>
class StorageTarget;
class UnlinkLocalFileMsgEx : public UnlinkLocalFileMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
int getTargetFD(const StorageTarget& target, ResponseContext& ctx, bool* outResponseSent);
FhgfsOpsErr forwardToSecondary(StorageTarget& target, ResponseContext& ctx,
bool* outChunkLocked);
};

View File

@@ -0,0 +1,147 @@
#include <program/Program.h>
#include "ListChunkDirIncrementalMsgEx.h"
bool ListChunkDirIncrementalMsgEx::processIncoming(ResponseContext& ctx)
{
uint16_t targetID = getTargetID();
bool isMirror = getIsMirror();
std::string relativeDir = getRelativeDir();
int64_t offset = getOffset();
unsigned maxOutEntries = getMaxOutEntries();
bool onlyFiles = getOnlyFiles();
FhgfsOpsErr result;
StringList names;
IntList entryTypes;
int64_t newOffset{0};
result = readChunks(targetID, isMirror, relativeDir, offset, maxOutEntries, onlyFiles, names,
entryTypes, newOffset);
// send response...
ctx.sendResponse(ListChunkDirIncrementalRespMsg(result, &names, &entryTypes, newOffset) );
return true;
}
/*
* CAUTION: No locking here!
*/
FhgfsOpsErr ListChunkDirIncrementalMsgEx::readChunks(uint16_t targetID, bool isMirror,
std::string& relativeDir, int64_t offset, unsigned maxOutEntries, bool onlyFiles,
StringList& outNames, IntList& outEntryTypes, int64_t &outNewOffset)
{
App* app = Program::getApp();
uint64_t numEntries = 0;
struct dirent* dirEntry = NULL;
auto* const target = app->getStorageTargets()->getTarget(targetID);
if (!target)
return FhgfsOpsErr_UNKNOWNTARGET;
const int targetFD = isMirror ? *target->getMirrorFD() : *target->getChunkFD();
int dirFD;
if (likely(!relativeDir.empty()))
dirFD = openat(targetFD, relativeDir.c_str(), O_RDONLY);
else
{
dirFD = dup(targetFD);
fcntl(dirFD, F_SETFL, O_RDONLY);
}
if(dirFD == -1)
{
int errCode = errno;
if((errCode != ENOENT)
|| (!getIgnoreNotExists()))
{
LogContext(__func__).logErr(
"Unable to open chunks directory; targetID: " + StringTk::uintToStr(targetID)
+ "; isMirror: " + StringTk::intToStr((int) isMirror) + "; relativeDir: "
+ relativeDir + ". SysErr: " + System::getErrString(errCode));
}
else
if (errCode == ENOENT)
return FhgfsOpsErr_PATHNOTEXISTS;
return FhgfsOpsErr_INTERNAL;
}
DIR* dirHandle = fdopendir(dirFD);
if(!dirHandle)
{
int errCode = errno;
close(dirFD);
if((errCode != ENOENT)
|| (!getIgnoreNotExists()))
{
LogContext(__func__).logErr(
"Unable to create dir handle; targetID: " + StringTk::uintToStr(targetID)
+ "; isMirror: " + StringTk::intToStr((int) isMirror) + "; relativeDir: "
+ relativeDir + ". SysErr: " + System::getErrString(errCode));
}
else
if (errCode == ENOENT)
return FhgfsOpsErr_PATHNOTEXISTS;
return FhgfsOpsErr_INTERNAL;
}
errno = 0; // recommended by posix (readdir(3p) )
// seek to offset
seekdir(dirHandle, offset); // (seekdir has no return value)
// the actual entry reading
for(; (numEntries < maxOutEntries) && (dirEntry = StorageTk::readdirFiltered(dirHandle));
numEntries++)
{
// get the entry type
DirEntryType entryType;
if(dirEntry->d_type != DT_UNKNOWN)
entryType = StorageTk::direntToDirEntryType(dirEntry->d_type);
else
{
struct stat statBuf;
int statRes = fstatat(dirFD, dirEntry->d_name, &statBuf, 0);
if(statRes == 0)
entryType = MetadataTk::posixFileTypeToDirEntryType(statBuf.st_mode);
else
entryType = DirEntryType_INVALID;
}
if ( (entryType != DirEntryType_DIRECTORY) || (!onlyFiles) )
{
outNames.push_back(dirEntry->d_name);
outEntryTypes.push_back(int(entryType));
}
outNewOffset = dirEntry->d_off;
}
int errnoCopy = errno; // copy value before closedir() for readdir() error check below
closedir(dirHandle);
if(!dirEntry && errnoCopy)
{
LogContext(__func__).logErr("Unable to fetch chunk entries. "
"targetID: " + StringTk::uintToStr(targetID) + "; "
"isMirror: " + StringTk::intToStr((int) isMirror) + "; "
"relativeDir: " + relativeDir + "; "
"SysErr: " + System::getErrString(errnoCopy) );
return FhgfsOpsErr_INTERNAL;
}
else
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include <common/net/message/storage/listing/ListChunkDirIncrementalMsg.h>
class ListChunkDirIncrementalMsgEx : public ListChunkDirIncrementalMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
FhgfsOpsErr readChunks(uint16_t targetID, bool isMirror, std::string& relativeDir,
int64_t offset, unsigned maxOutEntries, bool onlyFiles, StringList& outNames,
IntList& outEntryTypes, int64_t &outNewOffset);
};

View File

@@ -0,0 +1,21 @@
#include "GetStorageResyncStatsMsgEx.h"
#include <common/net/message/storage/mirroring/GetStorageResyncStatsRespMsg.h>
#include <program/Program.h>
bool GetStorageResyncStatsMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
BuddyResyncer* buddyResyncer = app->getBuddyResyncer();
uint16_t targetID = getTargetID();
StorageBuddyResyncJobStatistics jobStats;
BuddyResyncJob* resyncJob = buddyResyncer->getResyncJob(targetID);
if (resyncJob)
resyncJob->getJobStats(jobStats);
ctx.sendResponse(GetStorageResyncStatsRespMsg(&jobStats) );
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/storage/mirroring/GetStorageResyncStatsMsg.h>
#include <common/storage/StorageErrors.h>
class GetStorageResyncStatsMsgEx : public GetStorageResyncStatsMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,344 @@
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/storage/mirroring/ResyncLocalFileRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <net/msghelpers/MsgHelperIO.h>
#include <toolkit/StorageTkEx.h>
#include <program/Program.h>
#include "ResyncLocalFileMsgEx.h"
bool ResyncLocalFileMsgEx::processIncoming(ResponseContext& ctx)
{
App* app = Program::getApp();
ChunkStore* chunkStore = app->getChunkDirStore();
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
const char* dataBuf = getDataBuf();
uint16_t targetID = getResyncToTargetID();
size_t count = getCount();
int64_t offset = getOffset();
std::string relativeChunkPathStr = getRelativePathStr();
int writeErrno;
bool writeRes;
StorageTarget* target;
int openFlags = O_WRONLY | O_CREAT;
SessionQuotaInfo quotaInfo(false, false, 0, 0);
// mode_t fileMode = STORAGETK_DEFAULTCHUNKFILEMODE;
int targetFD;
int fd;
FhgfsOpsErr openRes;
// should only be used for chunk balancing, to sync data to both buddies
if(isMsgHeaderFeatureFlagSet(RESYNCLOCALFILEMSG_FLAG_CHUNKBALANCE_BUDDYMIRROR) )
{ // given targetID refers to a buddy mirror group
MirrorBuddyGroupMapper* mirrorBuddies = app->getMirrorBuddyGroupMapper();
targetID = isMsgHeaderFeatureFlagSet(RESYNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND) ?
mirrorBuddies->getSecondaryTargetID(targetID) :
mirrorBuddies->getPrimaryTargetID(targetID);
if(unlikely(!targetID) )
{ // unknown target
LogContext(__func__).logErr("Invalid mirror buddy group ID: " +
StringTk::uintToStr(!targetID ) );
retVal = FhgfsOpsErr_UNKNOWNTARGET;
goto send_response;
}
}
target = app->getStorageTargets()->getTarget(targetID);
if (!target)
{
LogContext(__func__).logErr(
"Error resyncing chunk; Could not open FD; chunkPath: "
+ relativeChunkPathStr);
retVal = FhgfsOpsErr_PATHNOTEXISTS;
goto send_response;
}
retVal = forwardToSecondary(*target, ctx);
//check if path is relative to buddy mirror dir or chunks dir
targetFD = isMsgHeaderFeatureFlagSet(RESYNCLOCALFILEMSG_FLAG_BUDDYMIRROR)
? *target->getMirrorFD()
: *target->getChunkFD();
// always truncate when we write the very first block of a file
if(!offset && !isMsgHeaderFeatureFlagSet (RESYNCLOCALFILEMSG_FLAG_NODATA) )
openFlags |= O_TRUNC;
openRes = chunkStore->openChunkFile(targetFD, NULL, relativeChunkPathStr, true,
openFlags, &fd, &quotaInfo, {});
if (openRes != FhgfsOpsErr_SUCCESS)
{
LogContext(__func__).logErr(
"Error resyncing chunk; Could not open FD; chunkPath: "
+ relativeChunkPathStr);
retVal = FhgfsOpsErr_PATHNOTEXISTS;
target->setState(TargetConsistencyState_BAD);
goto send_response;
}
if(isMsgHeaderFeatureFlagSet (RESYNCLOCALFILEMSG_FLAG_NODATA)) // do not sync actual data
goto set_attribs;
if(isMsgHeaderFeatureFlagSet (RESYNCLOCALFILEMSG_CHECK_SPARSE))
writeRes = doWriteSparse(fd, dataBuf, count, offset, writeErrno);
else
writeRes = doWrite(fd, dataBuf, count, offset, writeErrno);
if(unlikely(!writeRes) )
{ // write error occured (could also be e.g. disk full)
LogContext(__func__).logErr(
"Error resyncing chunk; chunkPath: " + relativeChunkPathStr + "; error: "
+ System::getErrString(writeErrno));
target->setState(TargetConsistencyState_BAD);
retVal = FhgfsOpsErrTk::fromSysErr(writeErrno);
}
if(isMsgHeaderFeatureFlagSet (RESYNCLOCALFILEMSG_FLAG_TRUNC))
{
int truncErrno;
// we trunc after a possible write, so we need to trunc at offset+count
bool truncRes = doTrunc(fd, offset + count, truncErrno);
if(!truncRes)
{
LogContext(__func__).logErr(
"Error resyncing chunk; chunkPath: " + relativeChunkPathStr + "; error: "
+ System::getErrString(truncErrno));
target->setState(TargetConsistencyState_BAD);
retVal = FhgfsOpsErrTk::fromSysErr(truncErrno);
}
}
set_attribs:
if(isMsgHeaderFeatureFlagSet(RESYNCLOCALFILEMSG_FLAG_SETATTRIBS) &&
(retVal == FhgfsOpsErr_SUCCESS))
{
SettableFileAttribs* attribs = getChunkAttribs();
// update mode
int chmodRes = fchmod(fd, attribs->mode);
if(chmodRes == -1)
{ // could be an error
int errCode = errno;
if(errCode != ENOENT)
{ // unhandled chmod() error
LogContext(__func__).logErr("Unable to change file mode: " + relativeChunkPathStr
+ ". SysErr: " + System::getErrString());
}
}
// update UID and GID...
int chownRes = fchown(fd, attribs->userID, attribs->groupID);
if(chownRes == -1)
{ // could be an error
int errCode = errno;
if(errCode != ENOENT)
{ // unhandled chown() error
LogContext(__func__).logErr( "Unable to change file owner: " + relativeChunkPathStr
+ ". SysErr: " + System::getErrString());
}
}
if((chmodRes == -1) || (chownRes == -1))
{
target->setState(TargetConsistencyState_BAD);
retVal = FhgfsOpsErr_INTERNAL;
}
}
close(fd);
send_response:
ctx.sendResponse(ResyncLocalFileRespMsg(retVal) );
return true;
}
/**
* Write until everything was written (handle short-writes) or an error occured
*/
bool ResyncLocalFileMsgEx::doWrite(int fd, const char* buf, size_t count, off_t offset,
int& outErrno)
{
size_t sumWriteRes = 0;
do
{
ssize_t writeRes =
MsgHelperIO::pwrite(fd, buf + sumWriteRes, count - sumWriteRes, offset + sumWriteRes);
if (unlikely(writeRes == -1) )
{
sumWriteRes = (sumWriteRes > 0) ? sumWriteRes : writeRes;
outErrno = errno;
return false;
}
sumWriteRes += writeRes;
} while (sumWriteRes != count);
return true;
}
/**
* Write until everything was written (handle short-writes) or an error occured
*/
bool ResyncLocalFileMsgEx::doWriteSparse(int fd, const char* buf, size_t count, off_t offset,
int& outErrno)
{
size_t sumWriteRes = 0;
const char zeroBuf[ RESYNCER_SPARSE_BLOCK_SIZE ] = { 0 };
do
{
size_t cmpLen = BEEGFS_MIN(count - sumWriteRes, RESYNCER_SPARSE_BLOCK_SIZE);
int cmpRes = memcmp(buf + sumWriteRes, zeroBuf, cmpLen);
if(!cmpRes)
{ // sparse area
sumWriteRes += cmpLen;
if(sumWriteRes == count)
{ // end of buf
// we must trunc here because this might be the end of the file
int truncRes = ftruncate(fd, offset+count);
if(unlikely(truncRes == -1) )
{
outErrno = errno;
return false;
}
}
}
else
{ // non-sparse area
ssize_t writeRes = MsgHelperIO::pwrite(fd, buf + sumWriteRes, cmpLen,
offset + sumWriteRes);
if(unlikely(writeRes == -1))
{
outErrno = errno;
return false;
}
sumWriteRes += writeRes;
}
} while (sumWriteRes != count);
return true;
}
bool ResyncLocalFileMsgEx::doTrunc(int fd, off_t length, int& outErrno)
{
int truncRes = ftruncate(fd, length);
if (truncRes == -1)
{
outErrno = errno;
return false;
}
return true;
}
/**
* If this is a buddy mirror msg and we are the primary, forward this msg to secondary.
*
* @return _COMMUNICATION if forwarding to buddy failed and buddy is not marked offline (in which
* case *outChunkLocked==false is guaranteed).
* @throw SocketException if sending of GenericResponseMsg fails.
*/
FhgfsOpsErr ResyncLocalFileMsgEx::forwardToSecondary(StorageTarget& target, ResponseContext& ctx)
{
const char* logContext = "ResyncLocalFileMsg incoming (forward to secondary)";
App* app = Program::getApp();
if(!isMsgHeaderFeatureFlagSet(RESYNCLOCALFILEMSG_FLAG_CHUNKBALANCE_BUDDYMIRROR))
return FhgfsOpsErr_SUCCESS; // nothing to do
// instead of creating a new msg object, we just re-use "this" with "buddymirror second" flag
addMsgHeaderFeatureFlag(RESYNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
RequestResponseArgs rrArgs(NULL, this, NETMSGTYPE_ResyncLocalFileResp);
RequestResponseTarget rrTarget(getResyncToTargetID(), app->getTargetMapper(), app->getStorageNodes(),
app->getTargetStateStore(), app->getMirrorBuddyGroupMapper(), true);
FhgfsOpsErr commRes = MessagingTk::requestResponseTarget(&rrTarget, &rrArgs);
// remove the flag that we just added for secondary
unsetMsgHeaderFeatureFlag(RESYNCLOCALFILEMSG_FLAG_BUDDYMIRROR_SECOND);
if(unlikely(
(commRes == FhgfsOpsErr_COMMUNICATION) &&
(rrTarget.outTargetReachabilityState == TargetReachabilityState_OFFLINE) ) )
{
LogContext(logContext).log(Log_DEBUG, "Secondary is offline and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getResyncToTargetID() ));
// buddy is marked offline, so local msg processing will be done and buddy needs resync
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS; // go ahead with local msg processing
}
if(unlikely(commRes != FhgfsOpsErr_SUCCESS) )
{
LogContext(logContext).log(Log_DEBUG, "Forwarding failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getResyncToTargetID() ) + "; "
"error: " + std::to_string(commRes));
std::string genericRespStr = "Communication with secondary failed. "
"mirror buddy group ID: " + StringTk::uintToStr(getResyncToTargetID() );
ctx.sendResponse(GenericResponseMsg(GenericRespMsgCode_INDIRECTCOMMERR,
std::move(genericRespStr)));
return FhgfsOpsErr_COMMUNICATION;
}
ResyncLocalFileRespMsg* respMsg = (ResyncLocalFileRespMsg*)rrArgs.outRespMsg.get();
FhgfsOpsErr secondaryRes = respMsg->getResult();
if(unlikely(secondaryRes != FhgfsOpsErr_SUCCESS) )
{
if(secondaryRes == FhgfsOpsErr_UNKNOWNTARGET)
{
/* local msg processing shall be done and buddy needs resync
(this is normal when a storage is restarted without a broken secondary target, so we
report success to a client in this case) */
LogContext(logContext).log(Log_DEBUG,
"Secondary reports unknown target error and will need resync. "
"mirror buddy group ID: " + StringTk::uintToStr(getResyncToTargetID() ) );
target.setBuddyNeedsResync(true);
return FhgfsOpsErr_SUCCESS;
}
LogContext(logContext).log(Log_NOTICE, std::string("Secondary reported error: ") +
std::to_string(secondaryRes) + "; "
"mirror buddy group ID: " + StringTk::uintToStr(getResyncToTargetID()) );
return secondaryRes;
}
return FhgfsOpsErr_SUCCESS;
}

View File

@@ -0,0 +1,17 @@
#pragma once
#include <common/net/message/storage/mirroring/ResyncLocalFileMsg.h>
#include <common/storage/StorageErrors.h>
class ResyncLocalFileMsgEx : public ResyncLocalFileMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
bool doWrite(int fd, const char* buf, size_t count, off_t offset, int& outErrno);
bool doWriteSparse(int fd, const char* buf, size_t count, off_t offset, int& outErrno);
bool doTrunc(int fd, off_t length, int& outErrno);
FhgfsOpsErr forwardToSecondary(StorageTarget& target, ResponseContext& ctx);
};

View File

@@ -0,0 +1,34 @@
#include "SetLastBuddyCommOverrideMsgEx.h"
#include <common/net/message/storage/mirroring/SetLastBuddyCommOverrideRespMsg.h>
#include <program/Program.h>
bool SetLastBuddyCommOverrideMsgEx::processIncoming(ResponseContext& ctx)
{
uint16_t targetID = getTargetID();
int64_t timestamp = getTimestamp();
bool abortResync = getAbortResync();
App* app = Program::getApp();
StorageTargets* storageTargets = app->getStorageTargets();
const auto target = storageTargets->getTarget(targetID);
if (!target)
{
ctx.sendResponse(SetLastBuddyCommOverrideRespMsg(FhgfsOpsErr_UNKNOWNTARGET));
return true;
}
target->setLastBuddyComm(std::chrono::system_clock::from_time_t(timestamp), true);
if (abortResync)
{
BuddyResyncJob* resyncJob = app->getBuddyResyncer()->getResyncJob(targetID);
if (resyncJob)
resyncJob->abort();
}
ctx.sendResponse(SetLastBuddyCommOverrideRespMsg(FhgfsOpsErr_SUCCESS));
return true;
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include <common/net/message/storage/mirroring/SetLastBuddyCommOverrideMsg.h>
#include <common/storage/StorageErrors.h>
class SetLastBuddyCommOverrideMsgEx : public SetLastBuddyCommOverrideMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,35 @@
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
#include <common/toolkit/MessagingTk.h>
#include <program/Program.h>
#include "StorageResyncStartedMsgEx.h"
bool StorageResyncStartedMsgEx::processIncoming(ResponseContext& ctx)
{
uint16_t targetID = getValue();
deleteMirrorSessions(targetID);
ctx.sendResponse(StorageResyncStartedRespMsg() );
return true;
}
void StorageResyncStartedMsgEx::deleteMirrorSessions(uint16_t targetID)
{
SessionStore* sessions = Program::getApp()->getSessions();
NumNodeIDList sessionIDs;
sessions->getAllSessionIDs(&sessionIDs);
for (NumNodeIDListCIter iter = sessionIDs.begin(); iter != sessionIDs.end(); iter++)
{
auto session = sessions->referenceSession(*iter);
if (!session) // meanwhile deleted
continue;
SessionLocalFileStore* sessionLocalFiles = session->getLocalFiles();
sessionLocalFiles->removeAllMirrorSessions(targetID);
}
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <common/net/message/storage/mirroring/StorageResyncStartedMsg.h>
class StorageResyncStartedMsgEx : public StorageResyncStartedMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
private:
void deleteMirrorSessions(uint16_t targetID);
};

View File

@@ -0,0 +1,81 @@
#include <app/App.h>
#include <common/app/log/LogContext.h>
#include <common/net/message/storage/quota/GetQuotaInfoRespMsg.h>
#include <common/storage/quota/GetQuotaConfig.h>
#include <common/storage/quota/Quota.h>
#include <session/ZfsSession.h>
#include <storage/QuotaBlockDevice.h>
#include <program/Program.h>
#include <toolkit/QuotaTk.h>
#include "GetQuotaInfoMsgEx.h"
bool GetQuotaInfoMsgEx::processIncoming(ResponseContext& ctx)
{
const char* logContext = "GetQuotaInfo (GetQuotaInfoMsg incoming)";
App *app = Program::getApp();
QuotaBlockDeviceMap quotaBlockDevices;
QuotaDataList outQuotaDataList;
ZfsSession session;
QuotaInodeSupport quotaInodeSupport = QuotaInodeSupport_UNKNOWN;
switch(getTargetSelection() )
{
case GETQUOTACONFIG_ALL_TARGETS_ONE_REQUEST:
{
size_t withQuota = 0;
for (const auto& target : app->getStorageTargets()->getTargets())
{
quotaBlockDevices.emplace(target.first, target.second->getQuotaBlockDevice());
if (target.second->getQuotaBlockDevice().supportsInodeQuota())
withQuota++;
}
if (withQuota == 0)
quotaInodeSupport = QuotaInodeSupport_NO_BLOCKDEVICES;
else if (withQuota == quotaBlockDevices.size())
quotaInodeSupport = QuotaInodeSupport_ALL_BLOCKDEVICES;
else
quotaInodeSupport = QuotaInodeSupport_SOME_BLOCKDEVICES;
break;
}
case GETQUOTACONFIG_ALL_TARGETS_ONE_REQUEST_PER_TARGET:
case GETQUOTACONFIG_SINGLE_TARGET:
if (auto* const target = app->getStorageTargets()->getTarget(getTargetNumID()))
{
quotaBlockDevices.emplace(getTargetNumID(), target->getQuotaBlockDevice());
quotaInodeSupport = target->getQuotaBlockDevice().quotaInodeSupportFromBlockDevice();
}
break;
}
if(quotaBlockDevices.empty() )
/* no quota data available but do not return an error during message processing, it's not
the correct place for error handling in this case */
LogContext(logContext).logErr("Error: no quota block devices.");
else
{
if(getQueryType() == QUERY_TYPE_SINGLE_ID)
QuotaTk::appendQuotaForID(getIDRangeStart(), getType(), &quotaBlockDevices,
&outQuotaDataList, &session);
else
if(getQueryType() == QUERY_TYPE_ID_RANGE)
QuotaTk::requestQuotaForRange(&quotaBlockDevices, getIDRangeStart(),
getIDRangeEnd(), getType(), &outQuotaDataList, &session);
else
if(getQueryType() == QUERY_TYPE_ID_LIST)
{
QuotaTk::requestQuotaForList(&quotaBlockDevices, getIDList(), getType(),
&outQuotaDataList, &session);
}
}
// send response
ctx.sendResponse(GetQuotaInfoRespMsg(&outQuotaDataList, quotaInodeSupport) );
return true;
}

View File

@@ -0,0 +1,14 @@
#pragma once
#include <common/net/message/storage/quota/GetQuotaInfoMsg.h>
#include <common/Common.h>
class GetQuotaInfoMsgEx : public GetQuotaInfoMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};

View File

@@ -0,0 +1,56 @@
#include <common/app/log/LogContext.h>
#include <common/net/message/storage/quota/SetExceededQuotaRespMsg.h>
#include "program/Program.h"
#include "SetExceededQuotaMsgEx.h"
bool SetExceededQuotaMsgEx::processIncoming(ResponseContext& ctx)
{
bool retVal = true;
FhgfsOpsErr errorCode = FhgfsOpsErr_SUCCESS;
if(Program::getApp()->getConfig()->getQuotaEnableEnforcement() )
{
// get the storage pool for which quota is exceeded
StoragePoolPtr storagePool =
Program::getApp()->getStoragePoolStore()->getPool(getStoragePoolId());
if (!storagePool)
{
LOG(QUOTA, WARNING, "Couldn't set exceeded quota, "
"because requested storage pool doesn't exist on storage server.",
("storagePoolId", getStoragePoolId()));
errorCode = FhgfsOpsErr_UNKNOWNPOOL;
goto send_response;
}
// see if any of our targets belong to this pool and, if yes, set exceeded quota info
for (const auto& mapping : Program::getApp()->getStorageTargets()->getTargets())
{
if (storagePool->hasTarget(mapping.first))
{
// update exceeded quota
Program::getApp()->getExceededQuotaStores()->get(mapping.first)->
updateExceededQuota(getExceededQuotaIDs(), getQuotaDataType(), getExceededType() );
}
}
}
else
{
LOG(QUOTA, ERR, "Unable to set exceeded quota IDs. Configuration problem detected. "
"The management daemon on " + ctx.peerName() + " has quota enforcement enabled, "
"but not this storage daemon. Fix this configuration problem or quota enforcement "
"will not work correctly. If quota enforcement settings have changed recently in the "
"mgmtd configuration, please restart all BeeGFS services.");
errorCode = FhgfsOpsErr_INTERNAL;
}
send_response:
ctx.sendResponse(SetExceededQuotaRespMsg(errorCode) );
return retVal;
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include <common/net/message/storage/quota/SetExceededQuotaMsg.h>
#include <common/Common.h>
class SetExceededQuotaMsgEx : public SetExceededQuotaMsg
{
public:
virtual bool processIncoming(ResponseContext& ctx);
};