New upstream version 8.1.0
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
#include "GetMetaResyncStatsMsgEx.h"
|
||||
|
||||
#include <common/net/message/storage/mirroring/GetMetaResyncStatsRespMsg.h>
|
||||
#include <components/buddyresyncer/BuddyResyncer.h>
|
||||
#include <program/Program.h>
|
||||
|
||||
bool GetMetaResyncStatsMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
BuddyResyncer* resyncer = Program::getApp()->getBuddyResyncer();
|
||||
|
||||
MetaBuddyResyncJobStatistics stats;
|
||||
|
||||
BuddyResyncJob* job = resyncer->getResyncJob();
|
||||
if (job)
|
||||
stats = job->getJobStats();
|
||||
|
||||
ctx.sendResponse(GetMetaResyncStatsRespMsg(&stats));
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/storage/mirroring/GetMetaResyncStatsMsg.h>
|
||||
|
||||
class GetMetaResyncStatsMsgEx : public GetMetaResyncStatsMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,376 @@
|
||||
#include "ResyncRawInodesMsgEx.h"
|
||||
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <common/net/message/storage/mirroring/ResyncRawInodesRespMsg.h>
|
||||
#include <app/App.h>
|
||||
#include <components/buddyresyncer/SyncCandidate.h>
|
||||
#include <net/message/storage/mirroring/SetMetadataMirroringMsgEx.h>
|
||||
#include <net/msghelpers/MsgHelperXAttr.h>
|
||||
#include <program/Program.h>
|
||||
#include <toolkit/XAttrTk.h>
|
||||
|
||||
#include <dirent.h>
|
||||
|
||||
bool ResyncRawInodesMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
LOG_DBG(MIRRORING, DEBUG, "", basePath, hasXAttrs, wholeDirectory);
|
||||
|
||||
const FhgfsOpsErr resyncRes = resyncStream(ctx);
|
||||
|
||||
ctx.sendResponse(ResyncRawInodesRespMsg(resyncRes));
|
||||
return resyncRes == FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
FhgfsOpsErr ResyncRawInodesMsgEx::resyncStream(ResponseContext& ctx)
|
||||
{
|
||||
if (hasXAttrs && !Program::getApp()->getConfig()->getStoreClientXAttrs())
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Primary has indicated xattr resync, but xattrs are disabled in config.");
|
||||
return FhgfsOpsErr_NOTSUPP;
|
||||
}
|
||||
|
||||
const auto& rootInfo = Program::getApp()->getMetaRoot();
|
||||
auto* const metaBGM = Program::getApp()->getMetaBuddyGroupMapper();
|
||||
auto* const rootDir = Program::getApp()->getRootDir();
|
||||
|
||||
// if the local root is not buddyMirrored yet, set local buddy mirroring for the root inode.
|
||||
if (rootInfo.getID().val() == metaBGM->getLocalGroupID() &&
|
||||
!rootDir->getIsBuddyMirrored())
|
||||
{
|
||||
const auto setMirrorRes = SetMetadataMirroringMsgEx::setMirroring();
|
||||
if (setMirrorRes != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Failed to set meta mirroring on the root directory", setMirrorRes);
|
||||
return setMirrorRes;
|
||||
}
|
||||
}
|
||||
|
||||
// if our path is a directory, we must create it now, otherwise, the directory may not be
|
||||
// created at all. for example the #fSiDs# directory in an empty content directory with no
|
||||
// orphaned fsids would not be created.
|
||||
if (wholeDirectory)
|
||||
{
|
||||
const auto mkRes = Program::getApp()->getMetaStore()->beginResyncFor(
|
||||
META_BUDDYMIRROR_SUBDIR_NAME / basePath, true);
|
||||
if (mkRes.first != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Failed to create metadata directory.", basePath,
|
||||
("mkRes", mkRes.first));
|
||||
return mkRes.first;
|
||||
}
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
const auto resyncPartRes = resyncSingle(ctx);
|
||||
if (resyncPartRes == FhgfsOpsErr_AGAIN)
|
||||
continue;
|
||||
else if (resyncPartRes == FhgfsOpsErr_SUCCESS)
|
||||
break;
|
||||
|
||||
return resyncPartRes;
|
||||
}
|
||||
|
||||
const FhgfsOpsErr result = wholeDirectory
|
||||
? removeUntouchedInodes()
|
||||
: FhgfsOpsErr_SUCCESS;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
FhgfsOpsErr ResyncRawInodesMsgEx::resyncSingle(ResponseContext& ctx)
|
||||
{
|
||||
uint32_t packetLength;
|
||||
|
||||
ctx.getSocket()->recvExact(&packetLength, sizeof(packetLength), 0);
|
||||
packetLength = LE_TO_HOST_32(packetLength);
|
||||
|
||||
if (packetLength == 0)
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
|
||||
std::unique_ptr<char[]> packetData(new char[packetLength]);
|
||||
|
||||
ctx.getSocket()->recvExact(packetData.get(), packetLength, 0);
|
||||
|
||||
Deserializer des(packetData.get(), packetLength);
|
||||
|
||||
MetaSyncFileType packetType;
|
||||
std::string relPath;
|
||||
|
||||
des
|
||||
% packetType
|
||||
% relPath;
|
||||
if (!des.good())
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Received bad data from primary.");
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
if (wholeDirectory)
|
||||
inodesWritten.push_back(relPath);
|
||||
|
||||
FhgfsOpsErr result;
|
||||
|
||||
switch (packetType)
|
||||
{
|
||||
case MetaSyncFileType::Inode:
|
||||
case MetaSyncFileType::Directory:
|
||||
result = resyncInode(ctx, basePath / relPath, des,
|
||||
packetType == MetaSyncFileType::Directory);
|
||||
break;
|
||||
|
||||
case MetaSyncFileType::Dentry:
|
||||
result = resyncDentry(ctx, basePath / relPath, des);
|
||||
break;
|
||||
|
||||
default:
|
||||
result = FhgfsOpsErr_INVAL;
|
||||
}
|
||||
|
||||
ctx.sendResponse(ResyncRawInodesRespMsg(result));
|
||||
|
||||
// if the resync has failed, we have to return the result twice - once as an ACK for the packet,
|
||||
// and another time to terminate the stream. mod sync could do without the termination, but
|
||||
// bulk resync can't.
|
||||
if (result == FhgfsOpsErr_SUCCESS)
|
||||
return FhgfsOpsErr_AGAIN;
|
||||
else
|
||||
return result;
|
||||
}
|
||||
|
||||
FhgfsOpsErr ResyncRawInodesMsgEx::resyncInode(ResponseContext& ctx, const Path& path,
|
||||
Deserializer& data, const bool isDirectory, const bool recvXAttrs)
|
||||
{
|
||||
std::map<std::string, std::vector<char>> content;
|
||||
bool isDeletion;
|
||||
|
||||
// Decide how to correctly deserialize incoming data based on 'recvXAttrs' flag:
|
||||
//
|
||||
// Note: After switching data structure used to encode/serialize inode data from std::vector to
|
||||
// std::map to accomodate new xattr introduced that stores Remote Storage Targets (RST) info in
|
||||
// inodes, we need to handle both formats (See PR#3905 for more details):
|
||||
// - true (default): Represents inode data with base meta xattr (META_XATTR_NAME) plus any
|
||||
// user-defined xattrs as key-value pairs in map format.
|
||||
// - false: Represents standalone dentry data still using the original vector format for
|
||||
// compatibility, deserialized as single value.
|
||||
if (!recvXAttrs)
|
||||
{
|
||||
content.try_emplace(META_XATTR_NAME);
|
||||
data
|
||||
% content[META_XATTR_NAME]
|
||||
% isDeletion;
|
||||
}
|
||||
else
|
||||
{
|
||||
data
|
||||
% content
|
||||
% isDeletion;
|
||||
}
|
||||
|
||||
if (!data.good())
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Received bad data from primary.");
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
if (isDeletion)
|
||||
{
|
||||
const bool rmRes = isDirectory
|
||||
? StorageTk::removeDirRecursive((META_BUDDYMIRROR_SUBDIR_NAME / path).str())
|
||||
: unlink((META_BUDDYMIRROR_SUBDIR_NAME / path).str().c_str()) == 0;
|
||||
|
||||
if (rmRes || errno == ENOENT)
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
|
||||
LOG(MIRRORING, ERR, "Failed to remove raw meta inode.", path, sysErr);
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
if (!isDirectory && wholeDirectory)
|
||||
{
|
||||
const auto unlinkRes = Program::getApp()->getMetaStore()->unlinkRawMetadata(
|
||||
META_BUDDYMIRROR_SUBDIR_NAME / path);
|
||||
if (unlinkRes != FhgfsOpsErr_SUCCESS && unlinkRes != FhgfsOpsErr_PATHNOTEXISTS)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not unlink raw metadata", path, unlinkRes);
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
}
|
||||
|
||||
auto inode = Program::getApp()->getMetaStore()->beginResyncFor(
|
||||
META_BUDDYMIRROR_SUBDIR_NAME / path, isDirectory);
|
||||
if (inode.first)
|
||||
return inode.first;
|
||||
|
||||
if (!isDirectory)
|
||||
{
|
||||
for (const auto& attr : content)
|
||||
{
|
||||
const auto setContentRes = inode.second.setContent(
|
||||
attr.first.c_str(), attr.second.data(), attr.second.size());
|
||||
|
||||
if (setContentRes)
|
||||
return setContentRes;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasXAttrs || !recvXAttrs)
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
|
||||
const auto xattrRes = resyncInodeXAttrs(ctx, inode.second);
|
||||
if (xattrRes != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Syncing XAttrs failed.", path, xattrRes);
|
||||
return xattrRes;
|
||||
}
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
FhgfsOpsErr ResyncRawInodesMsgEx::resyncDentry(ResponseContext& ctx, const Path& path,
|
||||
Deserializer& data)
|
||||
{
|
||||
bool linksToFsID;
|
||||
|
||||
data % linksToFsID;
|
||||
if (!data.good())
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Received bad data from primary.");
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
// dentries with independent contents (dir dentries, dentries to non-inlined files) can be
|
||||
// treated like inodes for the purpose of resync. don't sync xattrs though, because dentries
|
||||
// should never have them. set recvXAttrs=false to indicate independent dentry data.
|
||||
if (!linksToFsID)
|
||||
return resyncInode(ctx, path, data, false, false);
|
||||
|
||||
std::string targetID;
|
||||
bool isDeletion;
|
||||
|
||||
data
|
||||
% targetID
|
||||
% isDeletion;
|
||||
if (!data.good())
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Received bad data from primary.");
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
const FhgfsOpsErr rmRes = Program::getApp()->getMetaStore()->unlinkRawMetadata(
|
||||
META_BUDDYMIRROR_SUBDIR_NAME / path);
|
||||
if (rmRes != FhgfsOpsErr_SUCCESS && rmRes != FhgfsOpsErr_PATHNOTEXISTS)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not unlink old dentry.", path, rmRes);
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
if (isDeletion)
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
|
||||
const Path& idPath = path.dirname() / META_DIRENTRYID_SUB_STR / targetID;
|
||||
const int linkRes = ::link(
|
||||
(META_BUDDYMIRROR_SUBDIR_NAME / idPath).str().c_str(),
|
||||
(META_BUDDYMIRROR_SUBDIR_NAME / path).str().c_str());
|
||||
if (linkRes < 0)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not link dentry to fsid.", path, idPath, sysErr);
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
FhgfsOpsErr ResyncRawInodesMsgEx::resyncInodeXAttrs(ResponseContext& ctx, IncompleteInode& inode)
|
||||
{
|
||||
std::string name;
|
||||
std::vector<char> value;
|
||||
|
||||
while (true)
|
||||
{
|
||||
auto readRes = MsgHelperXAttr::StreamXAttrState::readNextXAttr(ctx.getSocket(), name, value);
|
||||
if (readRes == FhgfsOpsErr_SUCCESS)
|
||||
break;
|
||||
else if (readRes != FhgfsOpsErr_AGAIN)
|
||||
return readRes;
|
||||
|
||||
auto setRes = inode.setXattr((XAttrTk::UserXAttrPrefix + name).c_str(), &value[0],
|
||||
value.size());
|
||||
if (setRes != FhgfsOpsErr_SUCCESS)
|
||||
return setRes;
|
||||
}
|
||||
|
||||
return inode.clearUnsetXAttrs();
|
||||
}
|
||||
|
||||
FhgfsOpsErr ResyncRawInodesMsgEx::removeUntouchedInodes()
|
||||
{
|
||||
std::sort(inodesWritten.begin(), inodesWritten.end());
|
||||
|
||||
const Path dirPath(META_BUDDYMIRROR_SUBDIR_NAME / basePath);
|
||||
|
||||
std::unique_ptr<DIR, StorageTk::CloseDirDeleter> dir(::opendir(dirPath.str().c_str()));
|
||||
|
||||
if (!dir)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not open meta directory.", dirPath, sysErr);
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
int dirFD = ::dirfd(dir.get());
|
||||
if (dirFD < 0)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not get directory fd.", sysErr);
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
struct dirent* found;
|
||||
|
||||
#if USE_READDIR_P
|
||||
struct dirent entry;
|
||||
int err = readdir_r(dir.get(), &entry, &found);
|
||||
#else
|
||||
errno = 0;
|
||||
found = readdir(dir.get());
|
||||
int err = found ? 0 : errno;
|
||||
#endif
|
||||
if (err > 0)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "readdir() failed.", sysErr(err));
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
if (!found)
|
||||
break;
|
||||
|
||||
if (strcmp(found->d_name, ".") == 0 || strcmp(found->d_name, "..") == 0)
|
||||
continue;
|
||||
|
||||
bool written = std::binary_search(
|
||||
inodesWritten.begin(), inodesWritten.end(),
|
||||
found->d_name);
|
||||
if (written)
|
||||
continue;
|
||||
|
||||
const int unlinkRes = ::unlinkat(dirFD, found->d_name, 0);
|
||||
if (unlinkRes == 0 || errno == ENOENT)
|
||||
continue;
|
||||
|
||||
if (errno != EISDIR)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not remove file", basePath, found->d_name, sysErr);
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
const bool rmRes = StorageTk::removeDirRecursive((dirPath / found->d_name).str());
|
||||
if (!rmRes)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not remove file", found->d_name, sysErr);
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
}
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/NetMessage.h>
|
||||
#include <common/storage/Path.h>
|
||||
#include <storage/IncompleteInode.h>
|
||||
|
||||
class ResyncRawInodesMsgEx : public NetMessageSerdes<ResyncRawInodesMsgEx>
|
||||
{
|
||||
public:
|
||||
ResyncRawInodesMsgEx(Path basePath, bool hasXAttrs, bool wholeDirectory):
|
||||
BaseType(NETMSGTYPE_ResyncRawInodes),
|
||||
basePath(std::move(basePath)),
|
||||
hasXAttrs(hasXAttrs),
|
||||
wholeDirectory(wholeDirectory)
|
||||
{}
|
||||
|
||||
ResyncRawInodesMsgEx(): BaseType(NETMSGTYPE_ResyncRawInodes) {}
|
||||
|
||||
bool processIncoming(ResponseContext& ctx) override;
|
||||
|
||||
template<typename This, typename Ctx>
|
||||
static void serialize(This obj, Ctx& ctx)
|
||||
{
|
||||
ctx
|
||||
% obj->basePath
|
||||
% obj->hasXAttrs
|
||||
% obj->wholeDirectory;
|
||||
}
|
||||
|
||||
private:
|
||||
Path basePath;
|
||||
bool hasXAttrs;
|
||||
bool wholeDirectory;
|
||||
|
||||
std::vector<std::string> inodesWritten;
|
||||
|
||||
FhgfsOpsErr resyncStream(ResponseContext& ctx);
|
||||
|
||||
FhgfsOpsErr resyncSingle(ResponseContext& ctx);
|
||||
|
||||
FhgfsOpsErr resyncInode(ResponseContext& ctx, const Path& path, Deserializer& data,
|
||||
const bool isDirectory, const bool recvXAttrs = true);
|
||||
FhgfsOpsErr resyncDentry(ResponseContext& ctx, const Path& path, Deserializer& data);
|
||||
|
||||
FhgfsOpsErr resyncInodeXAttrs(ResponseContext& ctx, IncompleteInode& inode);
|
||||
|
||||
FhgfsOpsErr removeUntouchedInodes();
|
||||
};
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
#include "ResyncSessionStoreMsgEx.h"
|
||||
|
||||
#include <app/App.h>
|
||||
#include <program/Program.h>
|
||||
#include <session/SessionStore.h>
|
||||
#include <common/net/message/storage/mirroring/ResyncSessionStoreRespMsg.h>
|
||||
|
||||
bool ResyncSessionStoreMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
Config* config = app->getConfig();
|
||||
|
||||
FhgfsOpsErr receiveRes = receiveStoreBuf(ctx.getSocket(), config->getConnMsgShortTimeout());
|
||||
|
||||
if (receiveRes == FhgfsOpsErr_OUTOFMEM)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Failed to allocate receive buffer for session store resync - out of memory.");
|
||||
return false;
|
||||
}
|
||||
else if (receiveRes == FhgfsOpsErr_COMMUNICATION)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Failed to receive session store buffer during resync.");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto sessionStoreBuf = getSessionStoreBuf();
|
||||
|
||||
SessionStore* sessionStore = Program::getApp()->getMirroredSessions();
|
||||
|
||||
const bool clearRes = sessionStore->clear();
|
||||
if (!clearRes)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Failed to clear session store.");
|
||||
ctx.sendResponse(ResyncSessionStoreRespMsg(FhgfsOpsErr_INTERNAL));
|
||||
return true;
|
||||
}
|
||||
|
||||
const bool deserRes = sessionStore->deserializeFromBuf(sessionStoreBuf.first,
|
||||
sessionStoreBuf.second, *Program::getApp()->getMetaStore());
|
||||
if (!deserRes)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Failed to deserialize session store data from primary.");
|
||||
ctx.sendResponse(ResyncSessionStoreRespMsg(FhgfsOpsErr_INTERNAL));
|
||||
return true;
|
||||
}
|
||||
|
||||
ctx.sendResponse(ResyncSessionStoreRespMsg(FhgfsOpsErr_SUCCESS));
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/storage/mirroring/ResyncSessionStoreMsg.h>
|
||||
|
||||
class ResyncSessionStoreMsgEx : public ResyncSessionStoreMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,187 @@
|
||||
#include <common/net/message/storage/mirroring/SetMetadataMirroringRespMsg.h>
|
||||
#include <common/toolkit/MetaStorageTk.h>
|
||||
#include <storage/DirInode.h>
|
||||
#include <storage/MetaStore.h>
|
||||
|
||||
#include <program/Program.h>
|
||||
|
||||
#include "SetMetadataMirroringMsgEx.h"
|
||||
|
||||
|
||||
bool SetMetadataMirroringMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
MirrorBuddyGroupMapper* metaBuddyGroupMapper = app->getMetaBuddyGroupMapper();
|
||||
|
||||
// verify that the current node is in a group, and is the primary for its group
|
||||
bool localNodeIsPrimary;
|
||||
uint16_t buddyGroupID = metaBuddyGroupMapper->getBuddyGroupID(app->getLocalNodeNumID().val(),
|
||||
&localNodeIsPrimary);
|
||||
|
||||
if (buddyGroupID == 0)
|
||||
{
|
||||
LogContext(__func__).logErr("This node is not part of a buddy group.");
|
||||
ctx.sendResponse(SetMetadataMirroringRespMsg(FhgfsOpsErr_INTERNAL));
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!localNodeIsPrimary)
|
||||
{
|
||||
LogContext(__func__).logErr("This node is not the primary root node.");
|
||||
ctx.sendResponse(SetMetadataMirroringRespMsg(FhgfsOpsErr_INTERNAL));
|
||||
return true;
|
||||
}
|
||||
|
||||
// verify owner of root dir
|
||||
if (app->getLocalNodeNumID() != app->getRootDir()->getOwnerNodeID())
|
||||
{
|
||||
LogContext(__func__).logErr("This node does not own the root directory.");
|
||||
ctx.sendResponse(SetMetadataMirroringRespMsg(FhgfsOpsErr_NOTOWNER));
|
||||
return true;
|
||||
}
|
||||
|
||||
FhgfsOpsErr setRes = setMirroring();
|
||||
|
||||
ctx.sendResponse(SetMetadataMirroringRespMsg(setRes) );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
FhgfsOpsErr SetMetadataMirroringMsgEx::setMirroring()
|
||||
{
|
||||
// no two threads must be allowed to run this code at the same time. this could happen during
|
||||
// bulk resync.
|
||||
static Mutex setMirrorMtx;
|
||||
std::unique_lock<Mutex> setMirrorMtxLock(setMirrorMtx);
|
||||
|
||||
// more than one thread may have called this method. if so, report success to the ones who waited
|
||||
if (Program::getApp()->getRootDir()->getIsBuddyMirrored())
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
|
||||
App* app = Program::getApp();
|
||||
MirrorBuddyGroupMapper* metaBuddyGroupMapper = app->getMetaBuddyGroupMapper();
|
||||
|
||||
NumNodeID localNodeNumID = app->getLocalNodeNumID();
|
||||
|
||||
// get buddy group for this node
|
||||
bool localNodeIsPrimary;
|
||||
uint16_t buddyGroupID = metaBuddyGroupMapper->getBuddyGroupID(localNodeNumID.val(),
|
||||
&localNodeIsPrimary);
|
||||
|
||||
// move inode of root directory to mirrored dir
|
||||
FhgfsOpsErr mvInodeRes;
|
||||
FhgfsOpsErr mvDirRes;
|
||||
|
||||
mvInodeRes = moveRootInode(false);
|
||||
|
||||
if (mvInodeRes != FhgfsOpsErr_SUCCESS)
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
|
||||
// move root directory to mirrored dir
|
||||
mvDirRes = moveRootDirectory(false);
|
||||
if (mvDirRes != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
// get inode back
|
||||
moveRootInode(true);
|
||||
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
// update buddy mirror info and write to disk
|
||||
// NOTE: this must happen after the data has been moved, because buddy mirror flag changes save
|
||||
// path inside of DirInode object
|
||||
DirInode* dir = app->getRootDir();
|
||||
|
||||
const FhgfsOpsErr setMirrorRes = dir->setAndStoreIsBuddyMirrored(true);
|
||||
if (setMirrorRes != FhgfsOpsErr_SUCCESS)
|
||||
{
|
||||
LOG(MIRRORING, ERR, "Could not set mirror state on root inode", setMirrorRes);
|
||||
const FhgfsOpsErr revertSetRes = dir->setAndStoreIsBuddyMirrored(false);
|
||||
if (revertSetRes != FhgfsOpsErr_SUCCESS)
|
||||
LOG(MIRRORING, ERR, "Could not revert mirror setting either, your filesystem is now corrupt",
|
||||
revertSetRes);
|
||||
|
||||
return FhgfsOpsErr_SAVEERROR;
|
||||
}
|
||||
|
||||
bool setOwnerRes = dir->setOwnerNodeID(NumNodeID(buddyGroupID) );
|
||||
|
||||
if (!setOwnerRes)
|
||||
{
|
||||
// get inode back
|
||||
moveRootInode(true);
|
||||
|
||||
// get dir back
|
||||
moveRootDirectory(true);
|
||||
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
// update root Node in meta store
|
||||
app->getMetaRoot().set(NumNodeID(buddyGroupID), true);
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
FhgfsOpsErr SetMetadataMirroringMsgEx::moveRootInode(bool revertMove)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
|
||||
Path oldPath( app->getMetaPath() + "/"
|
||||
+ MetaStorageTk::getMetaInodePath(app->getInodesPath()->str(),
|
||||
META_ROOTDIR_ID_STR));
|
||||
|
||||
Path newPath( app->getMetaPath() + "/"
|
||||
+ MetaStorageTk::getMetaInodePath(app->getBuddyMirrorInodesPath()->str(),
|
||||
META_ROOTDIR_ID_STR));
|
||||
|
||||
int renameRes;
|
||||
if ( !revertMove )
|
||||
{
|
||||
StorageTk::createPathOnDisk(newPath, true);
|
||||
renameRes = rename(oldPath.str().c_str(), newPath.str().c_str());
|
||||
}
|
||||
else
|
||||
renameRes = rename(newPath.str().c_str(), oldPath.str().c_str());
|
||||
|
||||
if ( renameRes )
|
||||
{
|
||||
LogContext(__func__).logErr(
|
||||
"Unable to move root inode; error: " + System::getErrString());
|
||||
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
|
||||
FhgfsOpsErr SetMetadataMirroringMsgEx::moveRootDirectory(bool revertMove)
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
|
||||
Path oldPath(app->getMetaPath() + "/"
|
||||
+ MetaStorageTk::getMetaDirEntryPath(app->getDentriesPath()->str(),
|
||||
META_ROOTDIR_ID_STR));
|
||||
Path newPath(app->getMetaPath() + "/"
|
||||
+ MetaStorageTk::getMetaDirEntryPath(app->getBuddyMirrorDentriesPath()->str(),
|
||||
META_ROOTDIR_ID_STR));
|
||||
|
||||
int renameRes;
|
||||
if ( !revertMove )
|
||||
{
|
||||
StorageTk::createPathOnDisk(newPath, true);
|
||||
renameRes = rename(oldPath.str().c_str(), newPath.str().c_str());
|
||||
}
|
||||
else
|
||||
renameRes = rename(newPath.str().c_str(), oldPath.str().c_str());
|
||||
|
||||
if (renameRes)
|
||||
{
|
||||
LogContext(__func__).logErr(
|
||||
"Unable to move root directory; error: " + System::getErrString());
|
||||
|
||||
return FhgfsOpsErr_INTERNAL;
|
||||
}
|
||||
|
||||
return FhgfsOpsErr_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/storage/StorageErrors.h>
|
||||
#include <common/net/message/storage/mirroring/SetMetadataMirroringMsg.h>
|
||||
|
||||
|
||||
class SetMetadataMirroringMsgEx : public SetMetadataMirroringMsg
|
||||
{
|
||||
public:
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
static FhgfsOpsErr setMirroring();
|
||||
|
||||
private:
|
||||
static FhgfsOpsErr moveRootInode(bool revertMove);
|
||||
static FhgfsOpsErr moveRootDirectory(bool revertMove);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
#include <common/components/worker/queue/MultiWorkQueue.h>
|
||||
#include <common/net/message/storage/mirroring/StorageResyncStartedRespMsg.h>
|
||||
#include <common/threading/Barrier.h>
|
||||
#include <common/threading/PThread.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
|
||||
#include <program/Program.h>
|
||||
#include <components/worker/BarrierWork.h>
|
||||
|
||||
#include "StorageResyncStartedMsgEx.h"
|
||||
|
||||
bool StorageResyncStartedMsgEx::processIncoming(ResponseContext& ctx)
|
||||
{
|
||||
NumNodeID nodeID = Program::getApp()->getLocalNodeNumID();
|
||||
|
||||
uint16_t targetID = getValue();
|
||||
if (targetID != nodeID.val())
|
||||
return false;
|
||||
|
||||
// Make sure all workers have processed all messages that were received before this one.
|
||||
// This ensures that no mirrored messages are in flight while resync starts.
|
||||
pauseWorkers();
|
||||
|
||||
// we may not have received a heartbeat from the mgmtd yet that could have told us that the
|
||||
// root inode is mirrored. since un-mirroring the root inode is currently not possible, we may
|
||||
// assume that the root inode is very much supposed to be mirrored.
|
||||
// we only need this info if we are currently the secondary of the root buddy group, but we
|
||||
// can safely set it on all nodes until we add an option to disable meta mirroring for the
|
||||
// root inode again.
|
||||
Program::getApp()->getRootDir()->setIsBuddyMirroredFlag(true);
|
||||
|
||||
// clear session store here to get rid of all inodes that will go away during resync.
|
||||
Program::getApp()->getMirroredSessions()->clear();
|
||||
|
||||
// we can now be sure that no mirrored dir inode is still referenced by an operation:
|
||||
// * the session store is cleared, so no references from there
|
||||
// * we have received this message, so must be in NeedsResync state
|
||||
// -> no mirrored operations will be addressed to this node
|
||||
// * we hold no references ourselves
|
||||
//
|
||||
// since resync changes mirrored dir inodes, we must invalidate all inodes that are currently
|
||||
// loaded (which will be at least root and mdisposal, plus any cached dir inodes) to ensure that
|
||||
// future operations will use the correct resynced data
|
||||
Program::getApp()->getMetaStore()->invalidateMirroredDirInodes();
|
||||
|
||||
ctx.sendResponse(StorageResyncStartedRespMsg());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void StorageResyncStartedMsgEx::pauseWorkers()
|
||||
{
|
||||
App* app = Program::getApp();
|
||||
WorkerList* workers = app->getWorkers();
|
||||
MultiWorkQueue* workQueue = app->getWorkQueue();
|
||||
pthread_t threadID = PThread::getCurrentThreadID();
|
||||
|
||||
// Stop all worker threads except our own
|
||||
Barrier workerBarrier(workers->size());
|
||||
for (WorkerListIter workerIt = workers->begin(); workerIt != workers->end(); ++workerIt)
|
||||
{
|
||||
// don't enqueue it in the worker that processes this message (this would deadlock)
|
||||
if (!PThread::threadIDEquals((*workerIt)->getID(), threadID))
|
||||
{
|
||||
PersonalWorkQueue* personalQ = (*workerIt)->getPersonalWorkQueue();
|
||||
workQueue->addPersonalWork(new BarrierWork(&workerBarrier), personalQ);
|
||||
}
|
||||
}
|
||||
|
||||
// Stall our own worker until all the other threads are blocked.
|
||||
workerBarrier.wait();
|
||||
|
||||
// Continue all workers
|
||||
workerBarrier.wait();
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/net/message/storage/mirroring/StorageResyncStartedMsg.h>
|
||||
|
||||
class StorageResyncStartedMsgEx : public StorageResyncStartedMsg
|
||||
{
|
||||
public:
|
||||
StorageResyncStartedMsgEx() : StorageResyncStartedMsg()
|
||||
{ }
|
||||
|
||||
virtual bool processIncoming(ResponseContext& ctx);
|
||||
|
||||
private:
|
||||
void pauseWorkers();
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user