2618 lines
85 KiB
C
2618 lines
85 KiB
C
#include <app/App.h>
|
|
#include <app/log/Logger.h>
|
|
#include <common/storage/Metadata.h>
|
|
#include <filesystem/FhgfsOpsHelper.h>
|
|
#include <app/config/Config.h>
|
|
#include <common/net/message/storage/lookup/LookupIntentMsg.h>
|
|
#include <common/net/message/storage/lookup/LookupIntentRespMsg.h>
|
|
#include <common/net/message/storage/creating/MkDirMsg.h>
|
|
#include <common/net/message/storage/creating/MkDirRespMsg.h>
|
|
#include <common/net/message/storage/creating/MkFileMsg.h>
|
|
#include <common/net/message/storage/creating/MkFileRespMsg.h>
|
|
#include <common/net/message/storage/creating/RmDirMsg.h>
|
|
#include <common/net/message/storage/creating/RmDirRespMsg.h>
|
|
#include <common/net/message/storage/creating/HardlinkMsg.h>
|
|
#include <common/net/message/storage/creating/HardlinkRespMsg.h>
|
|
#include <common/net/message/storage/creating/UnlinkFileMsg.h>
|
|
#include <common/net/message/storage/creating/UnlinkFileRespMsg.h>
|
|
#include <common/net/message/storage/listing/ListDirFromOffsetMsg.h>
|
|
#include <common/net/message/storage/listing/ListDirFromOffsetRespMsg.h>
|
|
#include <common/net/message/storage/moving/RenameMsg.h>
|
|
#include <common/net/message/storage/moving/RenameRespMsg.h>
|
|
#include <common/net/message/storage/attribs/ListXAttrMsg.h>
|
|
#include <common/net/message/storage/attribs/ListXAttrRespMsg.h>
|
|
#include <common/net/message/storage/attribs/GetXAttrMsg.h>
|
|
#include <common/net/message/storage/attribs/GetXAttrRespMsg.h>
|
|
#include <common/net/message/storage/attribs/RemoveXAttrMsg.h>
|
|
#include <common/net/message/storage/attribs/RemoveXAttrRespMsg.h>
|
|
#include <common/net/message/storage/attribs/SetXAttrMsg.h>
|
|
#include <common/net/message/storage/attribs/SetXAttrRespMsg.h>
|
|
#include <common/net/message/storage/attribs/RefreshEntryInfoMsg.h>
|
|
#include <common/net/message/storage/attribs/RefreshEntryInfoRespMsg.h>
|
|
#include <common/net/message/storage/attribs/SetAttrMsg.h>
|
|
#include <common/net/message/storage/attribs/SetAttrRespMsg.h>
|
|
#include <common/net/message/storage/attribs/StatMsg.h>
|
|
#include <common/net/message/storage/attribs/StatRespMsg.h>
|
|
#include <common/net/message/storage/TruncFileMsg.h>
|
|
#include <common/net/message/storage/TruncFileRespMsg.h>
|
|
#include <common/net/message/session/BumpFileVersion.h>
|
|
#include <common/net/message/session/BumpFileVersionResp.h>
|
|
#include <common/net/message/session/GetFileVersionMsg.h>
|
|
#include <common/net/message/session/GetFileVersionRespMsg.h>
|
|
#include <common/net/message/session/locking/FLockAppendMsg.h>
|
|
#include <common/net/message/session/locking/FLockAppendRespMsg.h>
|
|
#include <common/net/message/session/locking/FLockEntryMsg.h>
|
|
#include <common/net/message/session/locking/FLockEntryRespMsg.h>
|
|
#include <common/net/message/session/locking/FLockRangeMsg.h>
|
|
#include <common/net/message/session/locking/FLockRangeRespMsg.h>
|
|
#include <common/net/message/session/opening/OpenFileMsg.h>
|
|
#include <common/net/message/session/opening/OpenFileRespMsg.h>
|
|
#include <common/net/message/session/opening/CloseFileMsg.h>
|
|
#include <common/net/message/session/opening/CloseFileRespMsg.h>
|
|
#include <common/storage/Path.h>
|
|
#include <common/storage/StorageDefinitions.h>
|
|
#include <common/storage/StorageErrors.h>
|
|
#include <common/toolkit/MessagingTk.h>
|
|
#include <common/toolkit/SynchronizedCounter.h>
|
|
#include <nodes/NodeStoreEx.h>
|
|
#include <common/net/message/NetMessage.h>
|
|
#include <common/nodes/Node.h>
|
|
#include <common/storage/StorageErrors.h>
|
|
#include <common/toolkit/MathTk.h>
|
|
#include <common/toolkit/MetadataTk.h>
|
|
#include <common/FhgfsTypes.h>
|
|
#include <common/toolkit/ackstore/AcknowledgmentStore.h>
|
|
#include <common/Common.h>
|
|
#include <net/filesystem/FhgfsOpsRemoting.h>
|
|
#include <toolkit/NoAllocBufferStore.h>
|
|
#include <toolkit/FhgfsPage.h>
|
|
#include <filesystem/FhgfsOpsPages.h>
|
|
#include "FhgfsOpsCommKit.h"
|
|
#include "FhgfsOpsCommKitVec.h"
|
|
|
|
static inline const char* __FhgfsOpsRemoting_rwTypeToString(enum Fhgfs_RWType);
|
|
|
|
static bool __FhgfsOpsRemoting_writefileVerify(App* app, RemotingIOInfo* ioInfo,
|
|
struct list_head* states, ssize_t* outWritten, unsigned firstTargetIndex,
|
|
unsigned numStripeNodes);
|
|
static inline int64_t __FhgfsOpsRemoting_getChunkOffset(int64_t pos, unsigned chunkSize,
|
|
size_t numNodes, size_t stripeNodeIndex);
|
|
|
|
|
|
struct Fhgfs_RWTypeStrEntry
|
|
{
|
|
enum Fhgfs_RWType type;
|
|
const char* typeStr;
|
|
};
|
|
|
|
struct Fhgfs_RWTypeStrEntry const __Fhgfs_RWTypeList[] =
|
|
{
|
|
{BEEGFS_RWTYPE_READ, "read vec"},
|
|
{BEEGFS_RWTYPE_WRITE, "write vec"},
|
|
};
|
|
|
|
#define __FHGFSOPS_REMOTING_RW_SIZE \
|
|
( (sizeof(__Fhgfs_RWTypeList) ) / (sizeof(struct Fhgfs_RWTypeStrEntry) ) )
|
|
|
|
#define __FHGFSOPS_REMOTING_msgBufCacheName BEEGFS_MODULE_NAME_STR "-pageVecMsgBufs"
|
|
#define __FHGFSOPS_REMOTING_msgBufPoolSize 8 // number of reserve (pre-allocated) msgBufs
|
|
|
|
const ssize_t __FHGFSOPS_REMOTING_MAX_XATTR_VALUE_SIZE = 60*1000;
|
|
const ssize_t __FHGFSOPS_REMOTING_MAX_XATTR_NAME_SIZE = 245;
|
|
|
|
static struct kmem_cache* FhgfsOpsRemoting_msgBufCache = NULL;
|
|
static mempool_t* FhgfsOpsRemoting_msgBufPool = NULL;
|
|
|
|
static mempool_t* writefileStatePool;
|
|
|
|
bool FhgfsOpsRemoting_initMsgBufCache(void)
|
|
{
|
|
FhgfsOpsRemoting_msgBufCache = OsCompat_initKmemCache(__FHGFSOPS_REMOTING_msgBufCacheName,
|
|
BEEGFS_COMMKIT_MSGBUF_SIZE, NULL);
|
|
|
|
if(!FhgfsOpsRemoting_msgBufCache)
|
|
goto fail_msgBufCache;
|
|
|
|
FhgfsOpsRemoting_msgBufPool = mempool_create_slab_pool(__FHGFSOPS_REMOTING_msgBufPoolSize,
|
|
FhgfsOpsRemoting_msgBufCache);
|
|
|
|
if(!FhgfsOpsRemoting_msgBufPool)
|
|
goto fail_msgBufPool;
|
|
|
|
writefileStatePool = mempool_create_kmalloc_pool(4, sizeof(struct FileOpVecState) );
|
|
if(!writefileStatePool)
|
|
goto fail_statePool;
|
|
|
|
return true;
|
|
|
|
fail_statePool:
|
|
mempool_destroy(FhgfsOpsRemoting_msgBufPool);
|
|
fail_msgBufPool:
|
|
kmem_cache_destroy(FhgfsOpsRemoting_msgBufCache);
|
|
fail_msgBufCache:
|
|
return false;
|
|
}
|
|
|
|
void FhgfsOpsRemoting_destroyMsgBufCache(void)
|
|
{
|
|
if(writefileStatePool)
|
|
mempool_destroy(writefileStatePool);
|
|
|
|
if(FhgfsOpsRemoting_msgBufPool)
|
|
mempool_destroy(FhgfsOpsRemoting_msgBufPool);
|
|
|
|
if(FhgfsOpsRemoting_msgBufCache)
|
|
kmem_cache_destroy(FhgfsOpsRemoting_msgBufCache);
|
|
}
|
|
|
|
|
|
/**
|
|
* Fhgfs_RWType enum value to human-readable string.
|
|
*/
|
|
const char* __FhgfsOpsRemoting_rwTypeToString(enum Fhgfs_RWType enumType)
|
|
{
|
|
size_t type = (size_t)enumType;
|
|
|
|
if (likely(type < __FHGFSOPS_REMOTING_RW_SIZE) )
|
|
return __Fhgfs_RWTypeList[type].typeStr;
|
|
|
|
|
|
printk_fhgfs(KERN_WARNING, "Unknown rwType given to %s(): %d; (dumping stack...)\n",
|
|
__func__, (int) type);
|
|
|
|
dump_stack();
|
|
|
|
return "Unknown error code";
|
|
|
|
}
|
|
|
|
static struct RRPeer rrpeer_from_entryinfo(const EntryInfo* entryInfo)
|
|
{
|
|
return EntryInfo_getIsBuddyMirrored(entryInfo)
|
|
? RRPeer_group(entryInfo->owner.group)
|
|
: RRPeer_target(entryInfo->owner.node);
|
|
}
|
|
|
|
/**
|
|
* Note: Clears FsDirInfo names/types, then sets server offset, adds names/types, sets contents
|
|
* offset and endOfDir in case of RPC success.
|
|
* Note: Also retrieves the optional entry types.
|
|
*
|
|
* @param dirInfo used as input (offsets) and output (contents etc.) parameter
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_listdirFromOffset(const EntryInfo* entryInfo, FsDirInfo* dirInfo,
|
|
unsigned maxOutNames)
|
|
{
|
|
FsObjectInfo* fsObjectInfo = (FsObjectInfo*)dirInfo;
|
|
App* app = FsObjectInfo_getApp(fsObjectInfo);
|
|
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (list dir from offset)";
|
|
|
|
int64_t serverOffset = FsDirInfo_getServerOffset(dirInfo);
|
|
ListDirFromOffsetMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
ListDirFromOffsetRespMsg* listDirResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
ListDirFromOffsetMsg_initFromEntryInfo(
|
|
&requestMsg, entryInfo, serverOffset, maxOutNames, false);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg,
|
|
NETMSGTYPE_ListDirFromOffsetResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
listDirResp = (ListDirFromOffsetRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)ListDirFromOffsetRespMsg_getResult(listDirResp);
|
|
|
|
if(likely(retVal == FhgfsOpsErr_SUCCESS) )
|
|
{
|
|
UInt8Vec* dirContentsTypes = FsDirInfo_getDirContentsTypes(dirInfo);
|
|
Int64CpyVec* serverOffsets = FsDirInfo_getServerOffsets(dirInfo);
|
|
StrCpyVec* dirContents = FsDirInfo_getDirContents(dirInfo);
|
|
StrCpyVec* dirContentIDs = FsDirInfo_getEntryIDs(dirInfo);
|
|
bool endOfDirReached;
|
|
|
|
FsDirInfo_setCurrentContentsPos(dirInfo, 0);
|
|
|
|
Int64CpyVec_clear(serverOffsets);
|
|
UInt8Vec_clear(dirContentsTypes);
|
|
StrCpyVec_clear(dirContents);
|
|
StrCpyVec_clear(dirContentIDs);
|
|
|
|
ListDirFromOffsetRespMsg_parseEntryTypes(listDirResp, dirContentsTypes);
|
|
ListDirFromOffsetRespMsg_parseNames(listDirResp, dirContents);
|
|
ListDirFromOffsetRespMsg_parseEntryIDs(listDirResp, dirContentIDs);
|
|
ListDirFromOffsetRespMsg_parseServerOffsets(listDirResp, serverOffsets);
|
|
|
|
// check for equal vector lengths
|
|
if(unlikely(
|
|
(UInt8Vec_length(dirContentsTypes) != StrCpyVec_length(dirContents) ) ||
|
|
(UInt8Vec_length(dirContentsTypes) != StrCpyVec_length(dirContentIDs) ) ||
|
|
(UInt8Vec_length(dirContentsTypes) != Int64CpyVec_length(serverOffsets) ) ) )
|
|
{ // appearently, at least one of the vector allocations failed
|
|
printk_fhgfs(KERN_WARNING,
|
|
"Memory allocation for directory contents retrieval failed.\n");
|
|
|
|
Int64CpyVec_clear(serverOffsets);
|
|
UInt8Vec_clear(dirContentsTypes);
|
|
StrCpyVec_clear(dirContents);
|
|
StrCpyVec_clear(dirContentIDs);
|
|
|
|
retVal = FhgfsOpsErr_OUTOFMEM;
|
|
|
|
goto cleanup_resp_buffers;
|
|
}
|
|
|
|
FsDirInfo_setServerOffset(dirInfo, ListDirFromOffsetRespMsg_getNewServerOffset(listDirResp) );
|
|
|
|
endOfDirReached = (StrCpyVec_length(dirContents) < maxOutNames);
|
|
FsDirInfo_setEndOfDir(dirInfo, endOfDirReached);
|
|
}
|
|
else
|
|
{
|
|
int logLevel = (retVal == FhgfsOpsErr_PATHNOTEXISTS) ? Log_DEBUG : Log_NOTICE;
|
|
|
|
Logger_logFormatted(log, logLevel, logContext, "ListDirResp error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal) );
|
|
}
|
|
|
|
// clean-up
|
|
|
|
cleanup_resp_buffers:
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Resolve path to entry owner and stat the entry.
|
|
* Note: This function should *only* be called to stat the root path, as it is the only dir
|
|
* without a parent, for other entries use _statDirect() instead, as the owner info is already
|
|
* available.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_statRoot(App* app, fhgfs_stat* outFhgfsStat)
|
|
{
|
|
FhgfsOpsErr retVal;
|
|
|
|
Node* node;
|
|
const char* logContext = "Stat root dir";
|
|
Logger* log = App_getLogger(app);
|
|
NodeOrGroup rootID;
|
|
|
|
NodeStoreEx* nodes = App_getMetaNodes(app);
|
|
|
|
node = NodeStoreEx_referenceRootNode(nodes, &rootID);
|
|
if(likely(node) )
|
|
{
|
|
const char* parentEntryID = "";
|
|
const char* entryID = META_ROOTDIR_ID_STR;
|
|
const char* fileName = "";
|
|
DirEntryType entryType = DirEntryType_DIRECTORY;
|
|
|
|
EntryInfo entryInfo;
|
|
EntryInfo_init(&entryInfo, rootID, parentEntryID, entryID, fileName, entryType, 0);
|
|
|
|
retVal = FhgfsOpsRemoting_statDirect(app, &entryInfo, outFhgfsStat);
|
|
|
|
// clean-up
|
|
Node_put(node);
|
|
}
|
|
else
|
|
{
|
|
Logger_logErr(log, logContext, "Unable to proceed without a working root metadata node");
|
|
retVal = FhgfsOpsErr_UNKNOWNNODE;
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Stat directly from entryInfo.
|
|
*
|
|
* Note: typically you will rather want to call the wrapper FhgfsOpsRemoting_statDirect() instead
|
|
* of this method.
|
|
*
|
|
* @param parentNodeID may be NULL if the caller is not interested (default)
|
|
* @param parentEntryID may be NULL if the caller is not interested (default)
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_statAndGetParentInfo(App* app, const EntryInfo* entryInfo,
|
|
fhgfs_stat* outFhgfsStat, NumNodeID* outParentNodeID, char** outParentEntryID)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (stat)";
|
|
|
|
StatMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
StatRespMsg* statResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
StatMsg_initFromEntryInfo(&requestMsg, entryInfo );
|
|
|
|
if (outParentNodeID)
|
|
StatMsg_addParentInfoRequest(&requestMsg);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_StatResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
statResp = (StatRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)StatRespMsg_getResult(statResp);
|
|
|
|
if(retVal == FhgfsOpsErr_SUCCESS)
|
|
{
|
|
StatData* statData = StatRespMsg_getStatData(statResp);
|
|
StatData_getOsStat(statData, outFhgfsStat);
|
|
|
|
if (outParentNodeID)
|
|
{
|
|
StatRespMsg_getParentInfo(statResp, outParentNodeID, outParentEntryID);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "StatResp error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal) );
|
|
IGNORE_UNUSED_VARIABLE(log);
|
|
IGNORE_UNUSED_VARIABLE(logContext);
|
|
}
|
|
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Modify file or dir attributes.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_setAttr(App* app, const EntryInfo* entryInfo,
|
|
SettableFileAttribs* fhgfsAttr, int validAttribs, const struct FileEvent* event)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
Config* cfg = App_getConfig(app);
|
|
const char* logContext = "Remoting (set attr)";
|
|
|
|
SetAttrMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
SetAttrRespMsg* setAttrResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// logging
|
|
if(validAttribs & SETATTR_CHANGE_MODE)
|
|
{ LOG_DEBUG(log, Log_DEBUG, logContext, "Changing mode"); }
|
|
|
|
if(validAttribs & SETATTR_CHANGE_USERID)
|
|
{ LOG_DEBUG(log, Log_DEBUG, logContext, "Changing userID"); }
|
|
|
|
if(validAttribs & SETATTR_CHANGE_GROUPID)
|
|
{ LOG_DEBUG(log, Log_DEBUG, logContext, "Changing groupID"); }
|
|
|
|
if(validAttribs & SETATTR_CHANGE_MODIFICATIONTIME)
|
|
{ LOG_DEBUG(log, Log_DEBUG, logContext, "Changing modificationTime"); }
|
|
|
|
if(validAttribs & SETATTR_CHANGE_LASTACCESSTIME)
|
|
{ LOG_DEBUG(log, Log_DEBUG, logContext, "Changing lastAccessTime"); }
|
|
|
|
// prepare request
|
|
SetAttrMsg_initFromEntryInfo(&requestMsg, entryInfo, validAttribs, fhgfsAttr, event);
|
|
|
|
if (validAttribs & (SETATTR_CHANGE_USERID | SETATTR_CHANGE_GROUPID))
|
|
{
|
|
if(Config_getQuotaEnabled(cfg) )
|
|
NetMessage_addMsgHeaderFeatureFlag((NetMessage*)&requestMsg, SETATTRMSG_FLAG_USE_QUOTA);
|
|
}
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_SetAttrResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
setAttrResp = (SetAttrRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)SetAttrRespMsg_getValue(setAttrResp);
|
|
|
|
if(unlikely(retVal != FhgfsOpsErr_SUCCESS) )
|
|
{ // error on server
|
|
int logLevel = (retVal == FhgfsOpsErr_PATHNOTEXISTS) ? Log_DEBUG : Log_NOTICE;
|
|
|
|
Logger_logFormatted(log, logLevel, logContext, "SetAttrResp error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal) );
|
|
}
|
|
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* @param outEntryInfo attribs set only in case of success (and must then be kfreed by the
|
|
* caller)
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_mkdir(App* app, const EntryInfo* parentInfo,
|
|
struct CreateInfo* createInfo, EntryInfo* outEntryInfo)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (mkdir)";
|
|
|
|
MkDirMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(parentInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
MkDirRespMsg* mkResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
MkDirMsg_initFromEntryInfo(&requestMsg, parentInfo, createInfo);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_MkDirResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
mkResp = (MkDirRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)MkDirRespMsg_getResult(mkResp);
|
|
|
|
if(retVal == FhgfsOpsErr_SUCCESS)
|
|
{ // success
|
|
EntryInfo_dup(MkDirRespMsg_getEntryInfo(mkResp), outEntryInfo );
|
|
}
|
|
else
|
|
{
|
|
int logLevel = Log_NOTICE;
|
|
|
|
if(retVal == FhgfsOpsErr_EXISTS)
|
|
logLevel = Log_DEBUG; // don't bother user with non-error messages
|
|
|
|
Logger_logFormatted(log, logLevel, logContext,
|
|
"MkDirResp ownerID: %u%s parentID: %s name: %s error code: %s ",
|
|
EntryInfo_getOwner(parentInfo), EntryInfo_getOwnerFlag(parentInfo), parentInfo->entryID,
|
|
createInfo->entryName, FhgfsOpsErr_toErrString(retVal));
|
|
}
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
FhgfsOpsErr FhgfsOpsRemoting_rmdir(App* app, const EntryInfo* parentInfo, const char* entryName,
|
|
const struct FileEvent* event)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (rmdir)";
|
|
|
|
RmDirMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(parentInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
RmDirRespMsg* rmResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
RmDirMsg_initFromEntryInfo(&requestMsg, parentInfo, entryName, event);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_RmDirResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
rmResp = (RmDirRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)RmDirRespMsg_getValue(rmResp);
|
|
|
|
if(retVal != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
int logLevel = Log_NOTICE;
|
|
|
|
if( (retVal == FhgfsOpsErr_NOTEMPTY) || (retVal == FhgfsOpsErr_PATHNOTEXISTS) ||
|
|
(retVal == FhgfsOpsErr_INUSE) )
|
|
logLevel = Log_DEBUG; // don't bother user with non-error messages
|
|
|
|
Logger_logFormatted(log, logLevel, logContext, "RmDirResp error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal) );
|
|
}
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* @param outEntryInfo attribs allocated/set only in case of success (and must then be kfreed by the
|
|
* caller); may be NULL.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_mkfile(App* app, const EntryInfo* parentInfo,
|
|
struct CreateInfo* createInfo, EntryInfo* outEntryInfo)
|
|
{
|
|
return FhgfsOpsRemoting_mkfileWithStripeHints(app, parentInfo, createInfo, 0, 0, outEntryInfo);
|
|
}
|
|
|
|
/**
|
|
* @param numtargets 0 for directory default.
|
|
* @param chunksize 0 for directory default, must be 64K or multiple of 64K otherwise.
|
|
* @param outEntryInfo attribs allocated/set only in case of success (and must then be kfreed by the
|
|
* caller); may be NULL.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_mkfileWithStripeHints(App* app, const EntryInfo* parentInfo,
|
|
struct CreateInfo* createInfo, unsigned numtargets, unsigned chunksize, EntryInfo* outEntryInfo)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (mkfile)";
|
|
|
|
MkFileMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(parentInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
MkFileRespMsg* mkResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
MkFileMsg_initFromEntryInfo(&requestMsg, parentInfo, createInfo);
|
|
|
|
if(numtargets || chunksize)
|
|
MkFileMsg_setStripeHints(&requestMsg, numtargets, chunksize);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_MkFileResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
mkResp = (MkFileRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)MkFileRespMsg_getResult(mkResp);
|
|
|
|
if(retVal == FhgfsOpsErr_SUCCESS)
|
|
{ // success
|
|
const EntryInfo* entryInfo = MkFileRespMsg_getEntryInfo(mkResp);
|
|
|
|
if(outEntryInfo)
|
|
EntryInfo_dup(entryInfo, outEntryInfo);
|
|
}
|
|
else
|
|
{
|
|
int logLevel = Log_NOTICE;
|
|
|
|
if(retVal == FhgfsOpsErr_EXISTS)
|
|
logLevel = Log_DEBUG; // don't bother user with non-error messages
|
|
|
|
Logger_logFormatted(log, logLevel, logContext,
|
|
"MkFileResp: ownerID: %u%s parentID: %s name: %s error code: %s",
|
|
EntryInfo_getOwner(parentInfo), EntryInfo_getOwnerFlag(parentInfo), parentInfo->entryID,
|
|
createInfo->entryName, FhgfsOpsErr_toErrString(retVal));
|
|
|
|
}
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
FhgfsOpsErr FhgfsOpsRemoting_unlinkfile(App* app, const EntryInfo* parentInfo,
|
|
const char* entryName, const struct FileEvent* event)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (unlink file)";
|
|
|
|
UnlinkFileMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(parentInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
UnlinkFileRespMsg* unlinkResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
UnlinkFileMsg_initFromEntryInfo(&requestMsg, parentInfo, entryName, event);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_UnlinkFileResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
unlinkResp = (UnlinkFileRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)UnlinkFileRespMsg_getValue(unlinkResp);
|
|
|
|
if(retVal != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
int logLevel = Log_NOTICE;
|
|
|
|
if(retVal == FhgfsOpsErr_PATHNOTEXISTS)
|
|
logLevel = Log_DEBUG; // don't bother user with non-error messages
|
|
|
|
Logger_logFormatted(log, logLevel, logContext, "UnlinkFileResp error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal) );
|
|
}
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* @param ioInfo in and out arg; in case of success: fileHandleID and pathInfo will be set
|
|
* pattern will be set if it is NULL; values have to be freed by the caller
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_openfile(const EntryInfo* entryInfo, RemotingIOInfo* ioInfo,
|
|
uint32_t* outVersion, const struct FileEvent* event)
|
|
{
|
|
App* app = ioInfo->app;
|
|
Logger* log = App_getLogger(app);
|
|
Config* cfg = App_getConfig(app);
|
|
const char* logContext = "Remoting (open file)";
|
|
|
|
const NumNodeID localNodeNumID = Node_getNumID(App_getLocalNode(app) );
|
|
OpenFileMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
OpenFileRespMsg* openResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// log open flags
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext,
|
|
"EntryID: %s access: %s%s%s; extra flags: %s|%s", entryInfo->entryID,
|
|
ioInfo->accessFlags & OPENFILE_ACCESS_READWRITE ? "rw" : "",
|
|
ioInfo->accessFlags & OPENFILE_ACCESS_WRITE ? "w" : "",
|
|
ioInfo->accessFlags & OPENFILE_ACCESS_READ ? "r" : "",
|
|
ioInfo->accessFlags & OPENFILE_ACCESS_APPEND ? "append" : "",
|
|
ioInfo->accessFlags & OPENFILE_ACCESS_TRUNC ? "trunc" : "");
|
|
|
|
// prepare request msg
|
|
OpenFileMsg_initFromSession(&requestMsg, localNodeNumID, entryInfo, ioInfo->accessFlags,
|
|
event);
|
|
|
|
if(Config_getQuotaEnabled(cfg) )
|
|
NetMessage_addMsgHeaderFeatureFlag((NetMessage*)&requestMsg, OPENFILEMSG_FLAG_USE_QUOTA);
|
|
|
|
if(Config_getSysBypassFileAccessCheckOnMeta(cfg))
|
|
NetMessage_addMsgHeaderFeatureFlag((NetMessage*)&requestMsg, OPENFILEMSG_FLAG_BYPASS_ACCESS_CHECK);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_OpenFileResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
openResp = (OpenFileRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)OpenFileRespMsg_getResult(openResp);
|
|
|
|
if(likely(retVal == FhgfsOpsErr_SUCCESS) )
|
|
{ // success => store file details
|
|
const PathInfo* msgPathInfoPtr;
|
|
|
|
ioInfo->fileHandleID = StringTk_strDup(OpenFileRespMsg_getFileHandleID(openResp) );
|
|
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "FileHandleID: %s",
|
|
OpenFileRespMsg_getFileHandleID(openResp) );
|
|
|
|
if(!ioInfo->pattern)
|
|
{ // inode doesn't have pattern yet => create it from response
|
|
StripePattern* pattern = OpenFileRespMsg_createPattern(openResp);
|
|
// check stripe pattern validity
|
|
if(unlikely(StripePattern_getPatternType(pattern) == STRIPEPATTERN_Invalid) )
|
|
{ // invalid pattern
|
|
Logger_logErrFormatted(log, logContext,
|
|
"Received invalid stripe pattern from metadata node: %u%s; FileHandleID: %s",
|
|
EntryInfo_getOwner(entryInfo), EntryInfo_getOwnerFlag(entryInfo),
|
|
OpenFileRespMsg_getFileHandleID(openResp) );
|
|
|
|
StripePattern_virtualDestruct(pattern);
|
|
|
|
// nevertheless, the file is open now on mds => close it
|
|
FhgfsOpsHelper_closefileWithAsyncRetry(entryInfo, ioInfo, NULL);
|
|
|
|
kfree(ioInfo->fileHandleID);
|
|
|
|
retVal = FhgfsOpsErr_INTERNAL;
|
|
}
|
|
else
|
|
ioInfo->pattern = pattern; // received a valid pattern
|
|
}
|
|
|
|
msgPathInfoPtr = OpenFileRespMsg_getPathInfo(openResp);
|
|
PathInfo_update(ioInfo->pathInfo, msgPathInfoPtr);
|
|
|
|
if (outVersion)
|
|
*outVersion = openResp->fileVersion;
|
|
}
|
|
else
|
|
{ // error on server
|
|
int logLevel = (retVal == FhgfsOpsErr_PATHNOTEXISTS) ? Log_DEBUG : Log_NOTICE;
|
|
|
|
Logger_logFormatted(log, logLevel, logContext, "OpenFileResp error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal) );
|
|
}
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
FhgfsOpsErr FhgfsOpsRemoting_closefile(const EntryInfo* entryInfo, RemotingIOInfo* ioInfo,
|
|
const struct FileEvent* event)
|
|
{
|
|
return FhgfsOpsRemoting_closefileEx(entryInfo, ioInfo, true, event);
|
|
}
|
|
|
|
/**
|
|
* Note: You probably want to call FhgfsOpsRemoting_closefile() instead of this method.
|
|
*
|
|
* @param allowRetries usually true, only callers like InternodeSyncer might use false
|
|
* here to make sure they don't block on communication retries too long.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_closefileEx(const EntryInfo* entryInfo, RemotingIOInfo* ioInfo,
|
|
bool allowRetries, const struct FileEvent* event)
|
|
{
|
|
App* app = ioInfo->app;
|
|
Config* cfg = App_getConfig(app);
|
|
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (close file)";
|
|
|
|
const NumNodeID localNodeNumID = Node_getNumID(App_getLocalNode(app) );
|
|
const int maxUsedTargetIndex = AtomicInt_read(ioInfo->maxUsedTargetIndex);
|
|
CloseFileMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
CloseFileRespMsg* closeResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
CloseFileMsg_initFromSession(&requestMsg, localNodeNumID, ioInfo->fileHandleID, entryInfo,
|
|
maxUsedTargetIndex, event);
|
|
|
|
if(Config_getTuneEarlyCloseResponse(cfg) ) // (note: linux doesn't return release() res to apps)
|
|
NetMessage_addMsgHeaderFeatureFlag( (NetMessage*)&requestMsg,
|
|
CLOSEFILEMSG_FLAG_EARLYRESPONSE);
|
|
|
|
if(ioInfo->needsAppendLockCleanup && *ioInfo->needsAppendLockCleanup)
|
|
NetMessage_addMsgHeaderFeatureFlag( (NetMessage*)&requestMsg,
|
|
CLOSEFILEMSG_FLAG_CANCELAPPENDLOCKS);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_CloseFileResp);
|
|
|
|
// connect
|
|
if(allowRetries)
|
|
{ // normal case
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
}
|
|
else
|
|
{ // caller doesn't want retries => just use the requestResponse method without retries...
|
|
requestRes = MessagingTk_requestResponseNode(app, &rrNode, &rrArgs);
|
|
}
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
closeResp = (CloseFileRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)CloseFileRespMsg_getValue(closeResp);
|
|
|
|
if(likely(retVal == FhgfsOpsErr_SUCCESS) )
|
|
{ // success
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "FileHandleID: %s", ioInfo->fileHandleID);
|
|
}
|
|
else
|
|
{ // error
|
|
Logger_logFormatted(log, Log_NOTICE, logContext, "CloseFileResp error: %s (FileHandleID: %s)",
|
|
FhgfsOpsErr_toErrString(retVal), ioInfo->fileHandleID);
|
|
}
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* This is the common code base of _flockEntryEx, _flockRangeEx, _flockAppendEx, which asks for a
|
|
* lock and resends the lock request every few seconds.
|
|
*
|
|
* @param requestMsg initialized lock request msg (will be sent to ownerNodeID), must be cleaned up
|
|
* by caller.
|
|
* @param respMsgType must be derived from SimpleIntMsg, contained result will be interpreted as
|
|
* FhgfsOpsErr_...
|
|
* @param allowRetries usually true, only callers like InternodeSyncer might use false
|
|
* here to make sure they don't block on communication retries too long.
|
|
*/
|
|
FhgfsOpsErr __FhgfsOpsRemoting_flockGenericEx(NetMessage* requestMsg, unsigned respMsgType,
|
|
NodeOrGroup owner, bool isBuddyMirrored, App* app, const char* fileHandleID, int lockTypeFlags,
|
|
char* lockAckID, bool allowRetries, RWLock* eiRLock)
|
|
{
|
|
const int resendIntervalMS = 5000;
|
|
|
|
const char* logContext = "Remoting (generic lock)";
|
|
|
|
Logger* log = App_getLogger(app);
|
|
AcknowledgmentStore* ackStore = App_getAckStore(app);
|
|
|
|
WaitAck lockWaitAck;
|
|
WaitAckMap waitAcks;
|
|
WaitAckMap receivedAcks;
|
|
WaitAckNotification ackNotifier;
|
|
|
|
FhgfsOpsErr requestRes;
|
|
SimpleIntMsg* flockResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
IGNORE_UNUSED_DEBUG_VARIABLE(log);
|
|
IGNORE_UNUSED_DEBUG_VARIABLE(logContext);
|
|
|
|
// register lock ack wait...
|
|
|
|
WaitAck_init(&lockWaitAck, lockAckID, NULL);
|
|
WaitAckMap_init(&waitAcks);
|
|
WaitAckMap_init(&receivedAcks);
|
|
WaitAckNotification_init(&ackNotifier);
|
|
|
|
WaitAckMap_insert(&waitAcks, lockAckID, &lockWaitAck);
|
|
|
|
AcknowledgmentStore_registerWaitAcks(ackStore, &waitAcks, &receivedAcks, &ackNotifier);
|
|
|
|
do
|
|
{
|
|
RequestResponseNode rrNode = {
|
|
.peer = isBuddyMirrored
|
|
? RRPeer_group(owner.group)
|
|
: RRPeer_target(owner.node),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
|
|
// prepare request
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)requestMsg, respMsgType);
|
|
|
|
// connect & communicate
|
|
if(allowRetries)
|
|
{ // normal case
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
}
|
|
else
|
|
{ // caller doesn't want retries => just use the requestResponse tool without retries...
|
|
requestRes = MessagingTk_requestResponseNode(app, &rrNode, &rrArgs);
|
|
}
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
break;
|
|
}
|
|
|
|
// store result
|
|
flockResp = (SimpleIntMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)SimpleIntMsg_getValue(flockResp);
|
|
|
|
// cleanup response
|
|
// (note: cleanup is done here to not block the buffer while we're waiting for delayed ack)
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
// handle result
|
|
|
|
if( (retVal == FhgfsOpsErr_WOULDBLOCK) && !(lockTypeFlags & ENTRYLOCKTYPE_NOWAIT) )
|
|
{
|
|
// not immediately granted and caller wants to wait for the lock
|
|
|
|
bool waitRes;
|
|
|
|
if (eiRLock)
|
|
RWLock_readUnlock(eiRLock);
|
|
|
|
waitRes = AcknowledgmentStore_waitForAckCompletion(
|
|
ackStore, &waitAcks, &ackNotifier, resendIntervalMS);
|
|
|
|
if (eiRLock)
|
|
RWLock_readLock(eiRLock);
|
|
|
|
if(waitRes)
|
|
{ // received lock grant
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "received async lock grant. "
|
|
"(handleID: %s)", fileHandleID);
|
|
|
|
retVal = FhgfsOpsErr_SUCCESS;
|
|
}
|
|
else
|
|
if (!waitRes && !signal_pending(current))
|
|
{ // no grant received yet => resend request
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "no grant yet. bailing to caller... "
|
|
"(handleID: %s)", fileHandleID);
|
|
|
|
break;
|
|
}
|
|
else
|
|
{ // signal pending => cancel waiting
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "canceling wait due to pending signal "
|
|
"(handleID: %s)", fileHandleID);
|
|
|
|
retVal = FhgfsOpsErr_INTERRUPTED;
|
|
}
|
|
}
|
|
|
|
if(retVal == FhgfsOpsErr_SUCCESS)
|
|
{ // success
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "received grant (handleID: %s)",
|
|
fileHandleID);
|
|
}
|
|
else
|
|
{ // error
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext,
|
|
"FLockResp error: %s (FileHandleID: %s)",
|
|
FhgfsOpsErr_toErrString(retVal), fileHandleID);
|
|
}
|
|
} while (0);
|
|
|
|
/* note: if we were interrupted after sending a lock request and user allowed waiting, we have
|
|
one last chance now to get our ack after unregistering. (this check is also important to avoid
|
|
race conditions because we still send acks to lock grants as long as we're registered.) */
|
|
|
|
AcknowledgmentStore_unregisterWaitAcks(ackStore, &waitAcks);
|
|
|
|
/* now that we're unregistered, we can safely check whether we received the grant after the
|
|
interrupt and before we unregistered ourselves (see corresponding note above) */
|
|
|
|
if( (retVal == FhgfsOpsErr_INTERRUPTED) && WaitAckMap_length(&receivedAcks) )
|
|
{ // good, we received the grant just in time
|
|
retVal = FhgfsOpsErr_SUCCESS;
|
|
}
|
|
|
|
WaitAckNotification_uninit(&ackNotifier);
|
|
WaitAckMap_uninit(&receivedAcks);
|
|
WaitAckMap_uninit(&waitAcks);
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Note: You probably want to call FhgfsOpsRemoting_flockAppend() instead of this method.
|
|
*
|
|
* @param clientFD will typically be 0, because we request append locks for the whole client (not
|
|
* for individual FDs).
|
|
* @param ownerPID just informative (value is stored on MDS, but not used)
|
|
* @param allowRetries usually true, only callers like InternodeSyncer might use false
|
|
* here to make sure they don't block on communication retries too long.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_flockAppendEx(const EntryInfo* entryInfo, RWLock* eiRLock, App* app,
|
|
const char* fileHandleID, int64_t clientFD, int ownerPID, int lockTypeFlags, bool allowRetries)
|
|
{
|
|
const char* logContext = "Remoting (flock append)";
|
|
|
|
Logger* log = App_getLogger(app);
|
|
AtomicInt* atomicCounter = App_getLockAckAtomicCounter(app);
|
|
const NumNodeID localNodeNumID = Node_getNumID(App_getLocalNode(app) );
|
|
char* lockAckID;
|
|
FLockAppendMsg requestMsg;
|
|
FhgfsOpsErr lockRes;
|
|
NodeString alias;
|
|
Node_copyAlias(App_getLocalNode(app), &alias);
|
|
// (note: lockAckID must be _globally_ unique)
|
|
lockAckID = StringTk_kasprintf("%X-%s-alck", AtomicInt_incAndRead(atomicCounter), alias.buf);
|
|
if(unlikely(!lockAckID) )
|
|
{ // out of mem
|
|
Logger_logErrFormatted(log, logContext, "Unable to proceed - out of memory");
|
|
return FhgfsOpsErr_INTERNAL;
|
|
}
|
|
|
|
do
|
|
{
|
|
FLockAppendMsg_initFromSession(&requestMsg, localNodeNumID, fileHandleID, entryInfo,
|
|
clientFD, ownerPID, lockTypeFlags, lockAckID);
|
|
|
|
lockRes = __FhgfsOpsRemoting_flockGenericEx( (NetMessage*)&requestMsg,
|
|
NETMSGTYPE_FLockAppendResp, entryInfo->owner, EntryInfo_getIsBuddyMirrored(entryInfo),
|
|
app, fileHandleID, lockTypeFlags, lockAckID, allowRetries, eiRLock);
|
|
} while (lockRes == FhgfsOpsErr_WOULDBLOCK && !(lockTypeFlags & ENTRYLOCKTYPE_NOWAIT));
|
|
|
|
// cleanup
|
|
kfree(lockAckID);
|
|
|
|
return lockRes;
|
|
}
|
|
|
|
/**
|
|
* Note: You probably want to call FhgfsOpsRemoting_flockEntry() instead of this method.
|
|
*
|
|
* @param allowRetries usually true, only callers like InternodeSyncer might use false
|
|
* here to make sure they don't block on communication retries too long.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_flockEntryEx(const EntryInfo* entryInfo, RWLock* eiRLock, App* app,
|
|
const char* fileHandleID, int64_t clientFD, int ownerPID, int lockTypeFlags, bool allowRetries)
|
|
{
|
|
const char* logContext = "Remoting (flock entry)";
|
|
|
|
Logger* log = App_getLogger(app);
|
|
AtomicInt* atomicCounter = App_getLockAckAtomicCounter(app);
|
|
const NumNodeID localNodeNumID = Node_getNumID(App_getLocalNode(app) );
|
|
char* lockAckID;
|
|
|
|
FLockEntryMsg requestMsg;
|
|
FhgfsOpsErr lockRes;
|
|
NodeString alias;
|
|
Node_copyAlias(App_getLocalNode(app), &alias);
|
|
|
|
// (note: lockAckID must be _globally_ unique)
|
|
lockAckID = StringTk_kasprintf("%X-%s-elck", AtomicInt_incAndRead(atomicCounter), alias.buf);
|
|
if(unlikely(!lockAckID) )
|
|
{ // out of mem
|
|
Logger_logErrFormatted(log, logContext, "Unable to proceed - out of memory");
|
|
return FhgfsOpsErr_INTERNAL;
|
|
}
|
|
|
|
do
|
|
{
|
|
FLockEntryMsg_initFromSession(&requestMsg, localNodeNumID, fileHandleID, entryInfo,
|
|
clientFD, ownerPID, lockTypeFlags, lockAckID);
|
|
|
|
lockRes = __FhgfsOpsRemoting_flockGenericEx((NetMessage*) &requestMsg,
|
|
NETMSGTYPE_FLockEntryResp, entryInfo->owner, EntryInfo_getIsBuddyMirrored(entryInfo), app,
|
|
fileHandleID, lockTypeFlags, lockAckID, allowRetries, eiRLock);
|
|
} while (lockRes == FhgfsOpsErr_WOULDBLOCK && !(lockTypeFlags & ENTRYLOCKTYPE_NOWAIT));
|
|
|
|
// cleanup
|
|
kfree(lockAckID);
|
|
|
|
return lockRes;
|
|
}
|
|
|
|
/**
|
|
* Note: You probably want to call FhgfsOpsRemoting_flockRange() instead of this method.
|
|
*
|
|
* @param allowRetries usually true, only callers like InternodeSyncer might use false
|
|
* here to make sure they don't block on communication retries too long.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_flockRangeEx(const EntryInfo* entryInfo, RWLock* eiRLock,
|
|
App* app, const char* fileHandleID, int ownerPID, int lockTypeFlags, uint64_t start, uint64_t end,
|
|
bool allowRetries)
|
|
{
|
|
const char* logContext = "Remoting (flock range)";
|
|
|
|
Logger* log = App_getLogger(app);
|
|
AtomicInt* atomicCounter = App_getLockAckAtomicCounter(app);
|
|
const NumNodeID localNodeNumID = Node_getNumID(App_getLocalNode(app) );
|
|
char* lockAckID;
|
|
|
|
FLockRangeMsg requestMsg;
|
|
FhgfsOpsErr lockRes;
|
|
NodeString alias;
|
|
Node_copyAlias(App_getLocalNode(app), &alias);
|
|
|
|
// (note: lockAckID must be _globally_ unique)
|
|
lockAckID = StringTk_kasprintf("%X-%s-rlck", AtomicInt_incAndRead(atomicCounter), alias.buf);
|
|
if(unlikely(!lockAckID) )
|
|
{ // out of mem
|
|
Logger_logErrFormatted(log, logContext, "Unable to proceed - out of memory");
|
|
return FhgfsOpsErr_INTERNAL;
|
|
}
|
|
|
|
do
|
|
{
|
|
FLockRangeMsg_initFromSession(&requestMsg, localNodeNumID, fileHandleID, entryInfo,
|
|
ownerPID, lockTypeFlags, start, end, lockAckID);
|
|
|
|
lockRes = __FhgfsOpsRemoting_flockGenericEx((NetMessage*) &requestMsg,
|
|
NETMSGTYPE_FLockRangeResp, entryInfo->owner, EntryInfo_getIsBuddyMirrored(entryInfo), app,
|
|
fileHandleID, lockTypeFlags, lockAckID, allowRetries, eiRLock);
|
|
} while (lockRes == FhgfsOpsErr_WOULDBLOCK && !(lockTypeFlags & ENTRYLOCKTYPE_NOWAIT));
|
|
|
|
// cleanup
|
|
kfree(lockAckID);
|
|
|
|
return lockRes;
|
|
}
|
|
|
|
/**
|
|
* Check if there have been errors during _writefile().
|
|
*
|
|
* @supposedWritten - number of bytes already written, including the current step; if an error
|
|
* came up, the number of unwritten bytes of this step will be subtracted.
|
|
* @outWritten - number of successfully written bytes or negative fhgfs error code.
|
|
* @firstWriteDone - bit mask of the targets; the bit of a target is true if a chunk was
|
|
* successful written to this target (for server cache loss detection).
|
|
* @firstTargetIndex - the index of the first stripe target (i.e. target index of states[0]).
|
|
* @numStripeNodes - count of stripe targets.
|
|
* @return - true if all was fine, false if there was an error.
|
|
*/
|
|
static bool __FhgfsOpsRemoting_writefileVerify(App* app, RemotingIOInfo* ioInfo,
|
|
struct list_head* states, ssize_t* outWritten, unsigned firstTargetIndex,
|
|
unsigned numStripeNodes)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (write file)";
|
|
|
|
unsigned i = 0;
|
|
struct FileOpVecState* vstate;
|
|
|
|
*outWritten = 0;
|
|
|
|
list_for_each_entry(vstate, states, base.base.targetInfoList)
|
|
{
|
|
FileOpState* state = &vstate->base;
|
|
/* note: we abort on the first error that comes up to return the number of successfully
|
|
written bytes till this error. (with a stripe count > 1 there might be data successfully
|
|
written to following targets, but this info cannot be returned to the user.) */
|
|
|
|
if(likely(state->expectedNodeResult >= state->base.nodeResult) )
|
|
{
|
|
// update BitStore with first write done
|
|
unsigned currentTargetIndex = (firstTargetIndex + i) % numStripeNodes;
|
|
|
|
BitStore_setBit(ioInfo->firstWriteDone, currentTargetIndex, true);
|
|
*outWritten += state->base.nodeResult;
|
|
i += 1;
|
|
}
|
|
|
|
if(likely(state->expectedNodeResult == state->base.nodeResult) )
|
|
continue;
|
|
|
|
// result was other than we expected.
|
|
|
|
if(state->base.nodeResult >= 0)
|
|
{ // node wrote only a part of the data (probably disk full => makes no sense to go on)
|
|
Logger_logFormatted(log, Log_NOTICE, logContext,
|
|
"Problem storage targetID: %hu; "
|
|
"Msg: Node wrote only %lld bytes (expected %lld bytes); FileHandle: %s",
|
|
state->base.targetID, state->base.nodeResult, state->expectedNodeResult,
|
|
ioInfo->fileHandleID);
|
|
}
|
|
else
|
|
{ // error occurred
|
|
FhgfsOpsErr nodeError = -(state->base.nodeResult);
|
|
|
|
if(nodeError == FhgfsOpsErr_INTERRUPTED) // this is normal on ctrl+c (=> no logErr() )
|
|
Logger_logFormatted(log, Log_DEBUG, logContext,
|
|
"Storage targetID: %hu; Msg: %s; FileHandle: %s",
|
|
state->base.targetID, FhgfsOpsErr_toErrString(nodeError), ioInfo->fileHandleID);
|
|
else
|
|
Logger_logErrFormatted(log, logContext,
|
|
"Error storage targetID: %hu; Msg: %s; FileHandle: %s",
|
|
state->base.targetID, FhgfsOpsErr_toErrString(nodeError), ioInfo->fileHandleID);
|
|
|
|
*outWritten = state->base.nodeResult;
|
|
}
|
|
|
|
return false;
|
|
} // end of for-loop (result verification)
|
|
|
|
return true;
|
|
}
|
|
|
|
static void writefile_nextIter(CommKitContext* context, FileOpState* state)
|
|
{
|
|
struct FileOpVecState* vecState = container_of(state, struct FileOpVecState, base);
|
|
|
|
state->data = vecState->data;
|
|
}
|
|
|
|
/**
|
|
* Single-threaded parallel file write.
|
|
* Works for mirrored and unmirrored files. In case of a mirrored file, the mirror data will be
|
|
* forwarded by the servers.
|
|
*
|
|
* @return number of bytes written or negative fhgfs error code
|
|
*/
|
|
ssize_t FhgfsOpsRemoting_writefileVec(struct iov_iter* iter, loff_t offset,
|
|
RemotingIOInfo* ioInfo, bool serializeWrites)
|
|
{
|
|
App* app = ioInfo->app;
|
|
|
|
bool verifyRes;
|
|
ssize_t verifyValue;
|
|
ssize_t retVal = iov_iter_count(iter);
|
|
size_t toBeWritten = iov_iter_count(iter);
|
|
loff_t currentOffset = offset;
|
|
struct iov_iter iterCopy = *iter;
|
|
|
|
StripePattern* pattern = ioInfo->pattern;
|
|
unsigned chunkSize = StripePattern_getChunkSize(pattern);
|
|
UInt16Vec* targetIDs = pattern->getStripeTargetIDs(pattern);
|
|
unsigned numStripeTargets = UInt16Vec_length(targetIDs);
|
|
int maxUsedTargetIndex = AtomicInt_read(ioInfo->maxUsedTargetIndex);
|
|
|
|
__FhgfsOpsRemoting_logDebugIOCall(__func__, iov_iter_count(iter), offset, ioInfo, NULL);
|
|
|
|
#ifdef BEEGFS_NVFS
|
|
ioInfo->nvfs = RdmaInfo_acquireNVFS();
|
|
#endif
|
|
while(toBeWritten)
|
|
{
|
|
unsigned currentTargetIndex = pattern->getStripeTargetIndex(pattern, currentOffset);
|
|
unsigned firstTargetIndex = currentTargetIndex;
|
|
unsigned numWorks = 0;
|
|
size_t expectedWritten = 0;
|
|
LIST_HEAD(statesList);
|
|
|
|
/* stripeset-loop: loop over one stripe set (using dynamically determined stripe target
|
|
indices). */
|
|
while(toBeWritten && (numWorks < numStripeTargets) )
|
|
{
|
|
size_t currentChunkSize =
|
|
StripePattern_getChunkEnd(pattern, currentOffset) - currentOffset + 1;
|
|
loff_t currentNodeLocalOffset = __FhgfsOpsRemoting_getChunkOffset(
|
|
currentOffset, chunkSize, numStripeTargets, currentTargetIndex);
|
|
struct iov_iter chunkIter;
|
|
|
|
struct FileOpVecState* state = mempool_alloc(writefileStatePool,
|
|
list_empty(&statesList) ? GFP_NOFS : GFP_NOWAIT);
|
|
|
|
if (!state)
|
|
break;
|
|
|
|
maxUsedTargetIndex = MAX(maxUsedTargetIndex, (int)currentTargetIndex);
|
|
|
|
chunkIter = iterCopy;
|
|
iov_iter_truncate(&chunkIter, currentChunkSize);
|
|
|
|
// prepare the state information
|
|
FhgfsOpsCommKit_initFileOpState(
|
|
&state->base,
|
|
currentNodeLocalOffset,
|
|
iov_iter_count(&chunkIter),
|
|
UInt16Vec_at(targetIDs, currentTargetIndex) );
|
|
|
|
state->base.firstWriteDoneForTarget =
|
|
BitStore_getBit(ioInfo->firstWriteDone, currentTargetIndex);
|
|
|
|
state->data = chunkIter;
|
|
|
|
list_add_tail(&state->base.base.targetInfoList, &statesList);
|
|
|
|
// (note on buddy mirroring: server-side mirroring is default, so nothing to do here)
|
|
|
|
App_incNumRemoteWrites(app);
|
|
|
|
// prepare for next loop
|
|
{
|
|
size_t count = iov_iter_count(&chunkIter);
|
|
currentOffset += count;
|
|
toBeWritten -= count;
|
|
expectedWritten += count;
|
|
numWorks++;
|
|
iov_iter_advance(&iterCopy, count);
|
|
currentTargetIndex = (currentTargetIndex + 1) % numStripeTargets;
|
|
}
|
|
|
|
if(serializeWrites)
|
|
break;
|
|
}
|
|
|
|
// communicate with the nodes
|
|
FhgfsOpsCommKit_writefileV2bCommunicate(app, ioInfo, &statesList, writefile_nextIter, NULL);
|
|
|
|
// verify results
|
|
verifyRes = __FhgfsOpsRemoting_writefileVerify(app, ioInfo, &statesList, &verifyValue,
|
|
firstTargetIndex, numStripeTargets);
|
|
|
|
while (!list_empty(&statesList) )
|
|
{
|
|
struct FileOpVecState* state = list_first_entry(&statesList, struct FileOpVecState,
|
|
base.base.targetInfoList);
|
|
|
|
list_del(&state->base.base.targetInfoList);
|
|
mempool_free(state, writefileStatePool);
|
|
}
|
|
|
|
if(unlikely(!verifyRes) )
|
|
{
|
|
retVal = verifyValue;
|
|
break;
|
|
}
|
|
} // end of while-loop (write out data)
|
|
|
|
AtomicInt_max(ioInfo->maxUsedTargetIndex, maxUsedTargetIndex);
|
|
if (retVal > 0)
|
|
iov_iter_advance(iter, retVal);
|
|
|
|
#ifdef BEEGFS_NVFS
|
|
if (ioInfo->nvfs)
|
|
{
|
|
RdmaInfo_releaseNVFS();
|
|
ioInfo->nvfs = false;
|
|
}
|
|
#endif
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Write/read a vector (array) of pages to/from the storage servers.
|
|
*
|
|
* @return number of bytes written or negative fhgfs error code
|
|
*/
|
|
ssize_t FhgfsOpsRemoting_rwChunkPageVec(FhgfsChunkPageVec *pageVec, RemotingIOInfo* ioInfo,
|
|
Fhgfs_RWType rwType)
|
|
{
|
|
App* app = ioInfo->app;
|
|
Logger* log = App_getLogger(app);
|
|
|
|
bool needReadWriteHandlePages = false; // needs to be done on error in this method
|
|
|
|
loff_t offset = FhgfsChunkPageVec_getFirstPageFileOffset(pageVec);
|
|
|
|
const char* logContext = "Remoting read/write vec";
|
|
const char* rwTypeStr = __FhgfsOpsRemoting_rwTypeToString(rwType);
|
|
|
|
StripePattern* pattern = ioInfo->pattern;
|
|
unsigned chunkSize = StripePattern_getChunkSize(pattern);
|
|
size_t chunkPages = RemotingIOInfo_getNumPagesPerStripe(ioInfo);
|
|
unsigned numPages = FhgfsChunkPageVec_getSize(pageVec);
|
|
unsigned pageIdx = 0; // page index
|
|
|
|
UInt16Vec* targetIDs = pattern->getStripeTargetIDs(pattern);
|
|
unsigned numStripeNodes = UInt16Vec_length(targetIDs);
|
|
int maxUsedTargetIndex = AtomicInt_read(ioInfo->maxUsedTargetIndex);
|
|
|
|
unsigned targetIndex = pattern->getStripeTargetIndex(pattern, offset);
|
|
|
|
loff_t chunkOffset = __FhgfsOpsRemoting_getChunkOffset(
|
|
offset, chunkSize, numStripeNodes, targetIndex);
|
|
|
|
ssize_t retVal;
|
|
|
|
char* msgBuf;
|
|
FhgfsCommKitVec state;
|
|
|
|
// sanity check
|
|
if (numPages > chunkPages)
|
|
{
|
|
Logger_logErrFormatted(log, logContext, "Bug: %s: numPages (%u) > numChunkPages (%lu)!",
|
|
rwTypeStr, numPages, chunkPages);
|
|
|
|
needReadWriteHandlePages = true;
|
|
retVal = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
#ifdef LOG_DEBUG_MESSAGES
|
|
{
|
|
ssize_t supposedSize = FhgfsChunkPageVec_getDataSize(pageVec);
|
|
__FhgfsOpsRemoting_logDebugIOCall(__func__, supposedSize, offset, ioInfo, rwTypeStr);
|
|
}
|
|
#endif
|
|
|
|
msgBuf = mempool_alloc(FhgfsOpsRemoting_msgBufPool, GFP_NOFS);
|
|
if (unlikely(!msgBuf) )
|
|
{ // pool allocs must not fail!
|
|
printk_fhgfs(KERN_WARNING, "kernel bug (%s): mempool_alloc failed\n", __func__);
|
|
|
|
needReadWriteHandlePages = true;
|
|
retVal = -ENOMEM;
|
|
goto outNoAlloc;
|
|
}
|
|
|
|
/* in debug mode we use individual allocations instead of a single big buffer, as we can detect
|
|
array out of bounds for those (if the proper kernel options are set) */
|
|
|
|
maxUsedTargetIndex = MAX(maxUsedTargetIndex, (int)targetIndex);
|
|
|
|
// prepare the state information
|
|
state = FhgfsOpsCommKitVec_assignRWfileState(
|
|
pageVec, pageIdx, numPages, chunkOffset, UInt16Vec_at(targetIDs, targetIndex), msgBuf);
|
|
|
|
FhgfsOpsCommKitVec_setRWFileStateFirstWriteDone(
|
|
BitStore_getBit(ioInfo->firstWriteDone, targetIndex), &state);
|
|
|
|
if (rwType == BEEGFS_RWTYPE_WRITE)
|
|
{
|
|
// (note on buddy mirroring: server-side mirroring is default, so nothing to do here)
|
|
|
|
App_incNumRemoteWrites(app);
|
|
}
|
|
else
|
|
{
|
|
App_incNumRemoteReads(app);
|
|
}
|
|
|
|
retVal = FhgfsOpsCommKitVec_rwFileCommunicate(app, ioInfo, &state, rwType);
|
|
|
|
if(retVal > 0)
|
|
{
|
|
if (rwType == BEEGFS_RWTYPE_WRITE)
|
|
BitStore_setBit(ioInfo->firstWriteDone, targetIndex, true);
|
|
}
|
|
else if (retVal == -FhgfsOpsErr_COMMUNICATION)
|
|
{
|
|
// commkit has done no communication at all, and thus hasn't touched any pages. it is our
|
|
// responsibility to end pending io with an error now.
|
|
needReadWriteHandlePages = true;
|
|
}
|
|
|
|
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext, "fileHandleID: %s rwType %s: sum-result %ld",
|
|
ioInfo->fileHandleID, rwTypeStr, retVal);
|
|
IGNORE_UNUSED_VARIABLE(log);
|
|
IGNORE_UNUSED_VARIABLE(logContext);
|
|
IGNORE_UNUSED_VARIABLE(rwTypeStr);
|
|
|
|
mempool_free(msgBuf, FhgfsOpsRemoting_msgBufPool);
|
|
|
|
outNoAlloc:
|
|
|
|
AtomicInt_max(ioInfo->maxUsedTargetIndex, maxUsedTargetIndex);
|
|
|
|
out:
|
|
if (unlikely(needReadWriteHandlePages) )
|
|
{
|
|
if (rwType == BEEGFS_RWTYPE_WRITE)
|
|
FhgfsChunkPageVec_iterateAllHandleWritePages(pageVec, -EAGAIN);
|
|
else
|
|
FhgfsChunkPageVec_iterateAllHandleReadErr(pageVec);
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
static void readfile_nextIter(CommKitContext* context, FileOpState* state)
|
|
{
|
|
struct FileOpVecState* vecState = container_of(state, struct FileOpVecState, base);
|
|
|
|
state->data = vecState->data;
|
|
}
|
|
|
|
ssize_t FhgfsOpsRemoting_readfileVec(struct iov_iter* iter, size_t toBeRead, loff_t offset,
|
|
RemotingIOInfo* ioInfo, FhgfsInode* fhgfsInode)
|
|
{
|
|
App* app = ioInfo->app;
|
|
|
|
ssize_t retVal = 0;
|
|
ssize_t errnum = 0;
|
|
BeeGFS_ReadSink readsink = {0};
|
|
loff_t currentOffset = offset;
|
|
|
|
StripePattern* pattern = ioInfo->pattern;
|
|
unsigned chunkSize = StripePattern_getChunkSize(pattern);
|
|
UInt16Vec* targetIDs = pattern->getStripeTargetIDs(pattern);
|
|
unsigned numStripeNodes = UInt16Vec_length(targetIDs);
|
|
const char* fileHandleID = ioInfo->fileHandleID;
|
|
int maxUsedTargetIndex = AtomicInt_read(ioInfo->maxUsedTargetIndex);
|
|
size_t stripeSetSize = (size_t) chunkSize * numStripeNodes;
|
|
|
|
__FhgfsOpsRemoting_logDebugIOCall(__func__, iov_iter_count(iter), offset, ioInfo, NULL);
|
|
|
|
#ifdef BEEGFS_NVFS
|
|
ioInfo->nvfs = RdmaInfo_acquireNVFS();
|
|
#endif
|
|
while(toBeRead && !errnum)
|
|
{
|
|
unsigned currentTargetIndex = pattern->getStripeTargetIndex(pattern, currentOffset);
|
|
LIST_HEAD(stateList);
|
|
struct FileOpVecState* state;
|
|
ssize_t bytesReadThisRound = 0;
|
|
struct iov_iter stripeSetIter;
|
|
size_t maxReadSize = min_t(size_t, stripeSetSize, toBeRead);
|
|
|
|
beegfs_readsink_reserve(&readsink, iter, maxReadSize);
|
|
stripeSetIter = readsink.sanitized_iter;
|
|
|
|
// stripeset-loop: loop over one stripe set (using dynamically determined stripe node
|
|
// indices).
|
|
|
|
for(unsigned numWorks = 0;
|
|
(numWorks < numStripeNodes
|
|
&& toBeRead && iov_iter_count(&stripeSetIter));
|
|
++ numWorks)
|
|
{
|
|
size_t currentChunkSize =
|
|
StripePattern_getChunkEnd(pattern, currentOffset) - currentOffset + 1;
|
|
size_t currentReadSize = MIN(currentChunkSize, toBeRead);
|
|
loff_t currentNodeLocalOffset = __FhgfsOpsRemoting_getChunkOffset(
|
|
currentOffset, chunkSize, numStripeNodes, currentTargetIndex);
|
|
struct iov_iter chunkIter;
|
|
|
|
state = kmalloc(sizeof(*state), list_empty(&stateList) ? GFP_NOFS : GFP_NOWAIT);
|
|
if (!state)
|
|
break;
|
|
|
|
maxUsedTargetIndex = MAX(maxUsedTargetIndex, (int)currentTargetIndex);
|
|
|
|
chunkIter = stripeSetIter;
|
|
iov_iter_truncate(&chunkIter, currentChunkSize);
|
|
|
|
// prepare the state information
|
|
FhgfsOpsCommKit_initFileOpState(
|
|
&state->base,
|
|
currentNodeLocalOffset,
|
|
iov_iter_count(&chunkIter),
|
|
UInt16Vec_at(targetIDs, currentTargetIndex) );
|
|
|
|
state->base.firstWriteDoneForTarget =
|
|
BitStore_getBit(ioInfo->firstWriteDone, currentTargetIndex);
|
|
|
|
state->data = chunkIter;
|
|
|
|
list_add_tail(&state->base.base.targetInfoList, &stateList);
|
|
|
|
// use secondary buddy mirror for odd inode numbers
|
|
if( (StripePattern_getPatternType(pattern) == STRIPEPATTERN_BuddyMirror) )
|
|
state->base.base.useBuddyMirrorSecond =
|
|
fhgfsInode ? (fhgfsInode->vfs_inode.i_ino & 1) : false;
|
|
|
|
App_incNumRemoteReads(app);
|
|
|
|
// prepare for next loop
|
|
currentOffset += currentReadSize;
|
|
toBeRead -= currentReadSize;
|
|
|
|
currentTargetIndex = (currentTargetIndex + 1) % numStripeNodes;
|
|
|
|
iov_iter_advance(&stripeSetIter, iov_iter_count(&chunkIter));
|
|
}
|
|
|
|
if(list_empty(&stateList) )
|
|
{
|
|
errnum = -FhgfsOpsErr_OUTOFMEM;
|
|
break;
|
|
}
|
|
|
|
// communicate with the nodes
|
|
FhgfsOpsCommKit_readfileV2bCommunicate(app, ioInfo, &stateList, readfile_nextIter, NULL);
|
|
|
|
// verify results
|
|
list_for_each_entry(state, &stateList, base.base.targetInfoList)
|
|
{
|
|
if(state->base.base.nodeResult == state->base.expectedNodeResult)
|
|
{
|
|
bytesReadThisRound += state->base.base.nodeResult;
|
|
continue;
|
|
}
|
|
|
|
// result not as expected => check cause: end-of-file or error condition
|
|
|
|
if(state->base.base.nodeResult >= 0)
|
|
{ // we have an end of file here (but some data might have been read)
|
|
bytesReadThisRound += state->base.base.nodeResult;
|
|
}
|
|
else
|
|
{ // error occurred
|
|
FhgfsOpsErr nodeError = -(state->base.base.nodeResult);
|
|
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (read file)";
|
|
|
|
if(nodeError == FhgfsOpsErr_INTERRUPTED) // normal on ctrl+c (=> no logErr() )
|
|
Logger_logFormatted(log, Log_DEBUG, logContext,
|
|
"Storage targetID: %hu; Msg: %s; FileHandle: %s",
|
|
state->base.base.targetID, FhgfsOpsErr_toErrString(nodeError), fileHandleID);
|
|
else
|
|
Logger_logErrFormatted(log, logContext,
|
|
"Error storage targetID: %hu; Msg: %s; FileHandle: %s",
|
|
state->base.base.targetID, FhgfsOpsErr_toErrString(nodeError), fileHandleID);
|
|
|
|
errnum = state->base.base.nodeResult;
|
|
}
|
|
|
|
toBeRead = 0; /* abort the read here due to incomplete result/error */
|
|
break;
|
|
} // end of results verification for-loop
|
|
|
|
while (!list_empty(&stateList) )
|
|
{
|
|
struct FileOpVecState* state = list_first_entry(&stateList, struct FileOpVecState,
|
|
base.base.targetInfoList);
|
|
|
|
list_del(&state->base.base.targetInfoList);
|
|
kfree(state);
|
|
}
|
|
|
|
beegfs_readsink_release(&readsink);
|
|
|
|
retVal += bytesReadThisRound;
|
|
iov_iter_advance(iter, bytesReadThisRound);
|
|
|
|
} // end of while(toBeRead)
|
|
|
|
AtomicInt_max(ioInfo->maxUsedTargetIndex, maxUsedTargetIndex);
|
|
|
|
beegfs_readsink_release(&readsink); // Make sure it's released even if we broke early from the loop
|
|
|
|
#ifdef BEEGFS_NVFS
|
|
if (ioInfo->nvfs)
|
|
{
|
|
RdmaInfo_releaseNVFS();
|
|
ioInfo->nvfs = false;
|
|
}
|
|
#endif
|
|
|
|
return retVal ? retVal : errnum;
|
|
}
|
|
|
|
|
|
FhgfsOpsErr FhgfsOpsRemoting_rename(App* app, const char* oldName, unsigned oldLen,
|
|
DirEntryType entryType, const EntryInfo* fromDirInfo, const char* newName, unsigned newLen,
|
|
const EntryInfo* toDirInfo, const struct FileEvent* event)
|
|
{
|
|
FhgfsOpsErr retVal;
|
|
|
|
RenameMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(fromDirInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
RenameRespMsg* renameResp;
|
|
|
|
// prepare request
|
|
|
|
RenameMsg_initFromEntryInfo(&requestMsg, oldName, oldLen, entryType, fromDirInfo, newName,
|
|
newLen, toDirInfo, event);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_RenameResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
renameResp = (RenameRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)RenameRespMsg_getValue(renameResp);
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
FhgfsOpsErr FhgfsOpsRemoting_truncfile(App* app, const EntryInfo* entryInfo, loff_t size,
|
|
const struct FileEvent* event)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
Config* cfg = App_getConfig(app);
|
|
const char* logContext = "Remoting (trunc file)";
|
|
|
|
TruncFileMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
TruncFileRespMsg* truncResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
TruncFileMsg_initFromEntryInfo(&requestMsg, size, entryInfo, event);
|
|
|
|
if(Config_getQuotaEnabled(cfg) )
|
|
NetMessage_addMsgHeaderFeatureFlag((NetMessage*)&requestMsg, TRUNCFILEMSG_FLAG_USE_QUOTA);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_TruncFileResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
truncResp = (TruncFileRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)TruncFileRespMsg_getValue(truncResp);
|
|
|
|
if(unlikely(retVal != FhgfsOpsErr_SUCCESS && retVal != FhgfsOpsErr_TOOBIG) )
|
|
{ // error on server
|
|
int logLevel = (retVal == FhgfsOpsErr_PATHNOTEXISTS) ? Log_DEBUG : Log_NOTICE;
|
|
|
|
Logger_logFormatted(log, logLevel, logContext, "TruncFileResp error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal) );
|
|
}
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
FhgfsOpsErr FhgfsOpsRemoting_fsyncfile(RemotingIOInfo* ioInfo, bool forceRemoteFlush,
|
|
bool checkSession, bool doSyncOnClose)
|
|
{
|
|
const char* logContext = "Remoting (fsync file)";
|
|
|
|
App* app = ioInfo->app;
|
|
Logger* log = App_getLogger(app);
|
|
|
|
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
|
|
|
|
StripePattern* pattern = ioInfo->pattern;
|
|
UInt16Vec* targetIDs = pattern->getStripeTargetIDs(pattern);
|
|
ssize_t numStripeTargets = UInt16Vec_length(targetIDs);
|
|
ssize_t numMirrorTargets = (StripePattern_getPatternType(pattern) == STRIPEPATTERN_BuddyMirror)
|
|
? numStripeTargets
|
|
: 0;
|
|
|
|
int i;
|
|
|
|
struct FsyncContext context = {
|
|
.userID = FhgfsCommon_getCurrentUserID(),
|
|
.forceRemoteFlush = forceRemoteFlush,
|
|
.checkSession = checkSession,
|
|
.doSyncOnClose = doSyncOnClose,
|
|
};
|
|
|
|
INIT_LIST_HEAD(&context.states);
|
|
|
|
for(i = 0; i < numStripeTargets + numMirrorTargets; i++)
|
|
{
|
|
struct FsyncState* state = kzalloc(sizeof(*state), GFP_NOFS);
|
|
uint16_t nodeIdx = i < numStripeTargets ? i : i - numStripeTargets;
|
|
|
|
if(!state)
|
|
{
|
|
Logger_logErr(log, logContext, "Memory allocation failed.");
|
|
retVal = FhgfsOpsErr_OUTOFMEM;
|
|
goto state_failed;
|
|
}
|
|
|
|
FhgfsOpsCommKit_initFsyncState(&context, state, UInt16Vec_at(targetIDs, nodeIdx) );
|
|
state->base.useBuddyMirrorSecond = i >= numStripeTargets;
|
|
}
|
|
|
|
FhgfsOpsCommKit_fsyncCommunicate(app, ioInfo, &context);
|
|
|
|
state_failed:
|
|
while(!list_empty(&context.states) )
|
|
{
|
|
struct FsyncState* state = list_first_entry(&context.states, struct FsyncState,
|
|
base.targetInfoList);
|
|
|
|
list_del(&state->base.targetInfoList);
|
|
if(state->base.nodeResult != FhgfsOpsErr_SUCCESS && retVal == FhgfsOpsErr_SUCCESS)
|
|
{
|
|
retVal = -state->base.nodeResult;
|
|
Logger_logFormatted(log, Log_WARNING, logContext,
|
|
"Error storage target: %hu; Msg: %s", state->base.selectedTargetID,
|
|
FhgfsOpsErr_toErrString(retVal) );
|
|
}
|
|
|
|
kfree(state);
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* @param ignoreErrors true if success should be returned even if some (or all) known targets
|
|
* returned errors (in which case outSizeTotal/outSizeFree values will only show numbers from
|
|
* targets without errors); local errors (e.g. failed mem alloc) will not be ignored.
|
|
* @return FhgfsOpsErr_UNKNOWNTARGET if no targets are known.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_statStoragePath(App* app, bool ignoreErrors, int64_t* outSizeTotal,
|
|
int64_t* outSizeFree)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (stat storage targets)";
|
|
|
|
TargetMapper* targetMapper = App_getTargetMapper(app);
|
|
|
|
FhgfsOpsErr retVal = FhgfsOpsErr_OUTOFMEM;
|
|
|
|
UInt16List targetIDs;
|
|
UInt16ListIter targetsIter;
|
|
unsigned numTargets;
|
|
|
|
struct list_head targetStates;
|
|
struct StatStorageState* state;
|
|
|
|
*outSizeTotal = 0;
|
|
*outSizeFree = 0;
|
|
|
|
UInt16List_init(&targetIDs);
|
|
INIT_LIST_HEAD(&targetStates);
|
|
|
|
TargetMapper_getTargetIDs(targetMapper, &targetIDs);
|
|
numTargets = UInt16List_length(&targetIDs);
|
|
|
|
if(unlikely(!numTargets) )
|
|
{ // we treat no known storage servers as an error
|
|
UInt16List_uninit(&targetIDs);
|
|
Logger_logFormatted(log, Log_CRITICAL, logContext, "No storage targets known.");
|
|
return FhgfsOpsErr_UNKNOWNTARGET;
|
|
}
|
|
|
|
UInt16ListIter_init(&targetsIter, &targetIDs);
|
|
while(!UInt16ListIter_end(&targetsIter) )
|
|
{
|
|
struct StatStorageState* state = kmalloc(sizeof(*state), GFP_NOFS);
|
|
|
|
if(!state)
|
|
goto fail_state;
|
|
|
|
FhgfsOpsCommKit_initStatStorageState(&targetStates, state,
|
|
UInt16ListIter_value(&targetsIter) );
|
|
|
|
UInt16ListIter_next(&targetsIter);
|
|
}
|
|
|
|
retVal = FhgfsOpsErr_SUCCESS;
|
|
FhgfsOpsCommKit_statStorageCommunicate(app, &targetStates);
|
|
|
|
list_for_each_entry(state, &targetStates, base.targetInfoList)
|
|
{
|
|
if(state->base.nodeResult != FhgfsOpsErr_SUCCESS)
|
|
{ // something went wrong with this target
|
|
LogLevel logLevel = ignoreErrors ? Log_DEBUG : Log_WARNING;
|
|
|
|
Logger_logFormatted(log, logLevel, logContext,
|
|
"Error target (storage): %hu; Msg: %s",
|
|
state->base.selectedTargetID, FhgfsOpsErr_toErrString(-state->base.nodeResult));
|
|
|
|
if(!ignoreErrors)
|
|
{
|
|
retVal = -state->base.nodeResult;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{ // we got data from this target => add up
|
|
*outSizeTotal += state->totalSize;
|
|
*outSizeFree += state->totalFree;
|
|
}
|
|
}
|
|
|
|
fail_state:
|
|
while(!list_empty(&targetStates) )
|
|
{
|
|
struct StatStorageState* state = list_first_entry(&targetStates, struct StatStorageState,
|
|
base.targetInfoList);
|
|
|
|
list_del(&state->base.targetInfoList);
|
|
kfree(state);
|
|
}
|
|
|
|
UInt16List_uninit(&targetIDs);
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* @param size Size of puffer pointed to by @outValue
|
|
* @param outSize size of the extended attribute list. May be larger than buffer, in that case not
|
|
* the whole list is read.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_listXAttr(App* app, const EntryInfo* entryInfo, char* outValue,
|
|
size_t size, ssize_t* outSize)
|
|
{
|
|
ListXAttrMsg requestMsg;
|
|
FhgfsOpsErr requestRes;
|
|
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
|
|
ListXAttrRespMsg* listXAttrResp;
|
|
char* xAttrList;
|
|
int xAttrListSize;
|
|
int listXAttrReturnCode;
|
|
|
|
// prepare request msg
|
|
ListXAttrMsg_initFromEntryInfoAndSize(&requestMsg, entryInfo, size);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, &requestMsg.netMessage, NETMSGTYPE_ListXAttrResp);
|
|
|
|
// connect & communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{
|
|
// clean up
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
listXAttrResp = (ListXAttrRespMsg*)rrArgs.outRespMsg;
|
|
|
|
listXAttrReturnCode = ListXAttrRespMsg_getReturnCode(listXAttrResp);
|
|
xAttrList = ListXAttrRespMsg_getValueBuf(listXAttrResp);
|
|
xAttrListSize = ListXAttrRespMsg_getSize(listXAttrResp);
|
|
|
|
if(listXAttrReturnCode != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
requestRes = listXAttrReturnCode;
|
|
}
|
|
else if(outValue)
|
|
{
|
|
// If outValue != NULL, copy the xattr list to the buffer.
|
|
if(xAttrListSize <= size)
|
|
{
|
|
memcpy(outValue, xAttrList, xAttrListSize);
|
|
*outSize = xAttrListSize;
|
|
}
|
|
else // provided buffer is too small: Error.
|
|
{
|
|
requestRes = FhgfsOpsErr_RANGE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// If outValue == NULL, just return the size.
|
|
*outSize = xAttrListSize;
|
|
}
|
|
|
|
// clean up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return requestRes;
|
|
}
|
|
|
|
extern FhgfsOpsErr FhgfsOpsRemoting_removeXAttr(App* app, const EntryInfo* entryInfo,
|
|
const char* name)
|
|
{
|
|
RemoveXAttrMsg requestMsg;
|
|
FhgfsOpsErr requestRes;
|
|
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
|
|
RemoveXAttrRespMsg* removeXAttrResp;
|
|
|
|
// prepqre request msg
|
|
RemoveXAttrMsg_initFromEntryInfoAndName(&requestMsg, entryInfo, name);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, &requestMsg.netMessage, NETMSGTYPE_RemoveXAttrResp);
|
|
|
|
// connect & communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{
|
|
// clean up
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
removeXAttrResp = (RemoveXAttrRespMsg*)rrArgs.outRespMsg;
|
|
|
|
requestRes = RemoveXAttrRespMsg_getValue(removeXAttrResp);
|
|
|
|
// clean up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return requestRes;
|
|
}
|
|
|
|
/**
|
|
* @param name Null-terminated string containing name of xattr.
|
|
* @param value Buffer containing new contents of xattr.
|
|
* @param size Size of buffer.
|
|
* @param flags Flags files as documented for setxattr syscall (XATTR_CREATE, XATTR_REPLACE).
|
|
*/
|
|
extern FhgfsOpsErr FhgfsOpsRemoting_setXAttr(App* app, const EntryInfo* entryInfo, const char* name,
|
|
const char* value, const size_t size, int flags)
|
|
{
|
|
SetXAttrMsg requestMsg;
|
|
FhgfsOpsErr requestRes;
|
|
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
|
|
SetXAttrRespMsg* setXAttrResp;
|
|
|
|
if (strlen(name) > __FHGFSOPS_REMOTING_MAX_XATTR_NAME_SIZE) // The attribute name is too long
|
|
return FhgfsOpsErr_RANGE; // (if the server side prefix is
|
|
// added).
|
|
|
|
if (size > __FHGFSOPS_REMOTING_MAX_XATTR_VALUE_SIZE) // The attribute itself is too large and
|
|
return FhgfsOpsErr_TOOLONG; // would not fit into a single NetMsg.
|
|
|
|
// prepare request msg
|
|
SetXAttrMsg_initFromEntryInfoNameValueAndSize(&requestMsg, entryInfo, name, value, size, flags);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, &requestMsg.netMessage, NETMSGTYPE_SetXAttrResp);
|
|
|
|
// connect & communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{
|
|
// clean up
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
setXAttrResp = (SetXAttrRespMsg*)rrArgs.outRespMsg;
|
|
|
|
requestRes = SetXAttrRespMsg_getValue(setXAttrResp);
|
|
|
|
// clean up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return requestRes;
|
|
|
|
}
|
|
|
|
/**
|
|
* @param name Zero-terminated string containing name of xattr.
|
|
* @param outValue pointer to buffer to store the value in.
|
|
* @param size size of buffer pointed to by @outValue.
|
|
* @param outSize size of the extended attribute value (may be larger than buffer, in that case not
|
|
* everything is read).
|
|
*/
|
|
extern FhgfsOpsErr FhgfsOpsRemoting_getXAttr(App* app, const EntryInfo* entryInfo, const char* name,
|
|
void* outValue, size_t size, ssize_t* outSize)
|
|
{
|
|
GetXAttrMsg requestMsg;
|
|
FhgfsOpsErr requestRes;
|
|
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
|
|
GetXAttrRespMsg* getXAttrResp;
|
|
char* xAttrBuf;
|
|
int xAttrSize;
|
|
int getXAttrReturnCode;
|
|
|
|
// prepare request msg
|
|
GetXAttrMsg_initFromEntryInfoNameAndSize(&requestMsg, entryInfo, name, size);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, &requestMsg.netMessage, NETMSGTYPE_GetXAttrResp);
|
|
|
|
// connect & communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{
|
|
// clean up
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
getXAttrResp = (GetXAttrRespMsg*)rrArgs.outRespMsg;
|
|
|
|
getXAttrReturnCode = GetXAttrRespMsg_getReturnCode(getXAttrResp);
|
|
xAttrBuf = GetXAttrRespMsg_getValueBuf(getXAttrResp);
|
|
xAttrSize = GetXAttrRespMsg_getSize(getXAttrResp);
|
|
|
|
if(getXAttrReturnCode != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
requestRes = getXAttrReturnCode;
|
|
}
|
|
else if(outValue)
|
|
{
|
|
// If outValue != NULL, copy the xattr buffer to the buffer.
|
|
if(xAttrSize <= size)
|
|
{
|
|
memcpy(outValue, xAttrBuf, xAttrSize);
|
|
*outSize = xAttrSize;
|
|
}
|
|
else // provided buffer is too small: Error.
|
|
{
|
|
requestRes = FhgfsOpsErr_RANGE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// If outValue == NULL, just return the size.
|
|
*outSize = xAttrSize;
|
|
}
|
|
|
|
// clean up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return requestRes;
|
|
}
|
|
|
|
/**
|
|
* Lookup or create a file and stat it in a single remote request.
|
|
*
|
|
* @param outInfo outInfo::outEntryInfo attribs set only in case of success (and must then be
|
|
* kfreed by the caller).
|
|
* @return success if we got the basic outInfo::outEntryInfo; this means either file existed
|
|
* and O_EXCL wasn't specified or file didn't exist and was created.
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_lookupIntent(App* app,
|
|
const LookupIntentInfoIn* inInfo, LookupIntentInfoOut* outInfo)
|
|
{
|
|
Config* cfg = App_getConfig(app);
|
|
|
|
FhgfsOpsErr retVal;
|
|
|
|
const CreateInfo* createInfo = inInfo->createInfo;
|
|
const OpenInfo* openInfo = inInfo->openInfo;
|
|
|
|
LookupIntentMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(inInfo->parentEntryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
LookupIntentRespMsg* lookupResp;
|
|
|
|
// prepare request
|
|
if (inInfo->entryInfoPtr)
|
|
{ // EntryInfo already available, so a revalidate intent (lookup-by-id/entryInfo)
|
|
LookupIntentMsg_initFromEntryInfo(&requestMsg, inInfo->parentEntryInfo, inInfo->entryName,
|
|
inInfo->entryInfoPtr, inInfo->metaVersion);
|
|
}
|
|
else
|
|
{ // no EntryInfo available, we need to lookup-by-name
|
|
LookupIntentMsg_initFromName(&requestMsg, inInfo->parentEntryInfo, inInfo->entryName);
|
|
}
|
|
|
|
// always add a stat-intent
|
|
LookupIntentMsg_addIntentStat(&requestMsg);
|
|
|
|
if(createInfo)
|
|
{
|
|
LookupIntentMsg_addIntentCreate(&requestMsg, createInfo->userID, createInfo->groupID,
|
|
createInfo->mode, createInfo->umask, createInfo->preferredStorageTargets,
|
|
createInfo->fileEvent);
|
|
|
|
if(inInfo->isExclusiveCreate)
|
|
LookupIntentMsg_addIntentCreateExclusive(&requestMsg);
|
|
}
|
|
|
|
if(openInfo)
|
|
{
|
|
const NumNodeID localNodeNumID = Node_getNumID(App_getLocalNode(app) );
|
|
LookupIntentMsg_addIntentOpen(&requestMsg, localNodeNumID, openInfo->accessFlags);
|
|
}
|
|
|
|
if(Config_getQuotaEnabled(cfg) )
|
|
NetMessage_addMsgHeaderFeatureFlag((NetMessage*)&requestMsg, LOOKUPINTENTMSG_FLAG_USE_QUOTA);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg,
|
|
NETMSGTYPE_LookupIntentResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // communication error
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
|
|
lookupResp = (LookupIntentRespMsg*)rrArgs.outRespMsg;
|
|
|
|
LookupIntentInfoOut_initFromRespMsg(outInfo, lookupResp);
|
|
|
|
// if we got here, the entry lookup or creation was successful
|
|
retVal = FhgfsOpsErr_SUCCESS;
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
|
|
#ifdef LOG_DEBUG_MESSAGES
|
|
void __FhgfsOpsRemoting_logDebugIOCall(const char* logContext, size_t size, loff_t offset,
|
|
RemotingIOInfo* ioInfo, const char* rwTypeStr)
|
|
{
|
|
App* app = ioInfo->app;
|
|
Logger* log = App_getLogger(app);
|
|
|
|
if (rwTypeStr)
|
|
Logger_logFormatted(log, Log_DEBUG, logContext,
|
|
"rwType: %s; size: %lld; offset: %lld; fileHandleID: %s openFlags: %d",
|
|
rwTypeStr, (long long)size, (long long)offset, ioInfo->fileHandleID, ioInfo->accessFlags);
|
|
else
|
|
Logger_logFormatted(log, Log_DEBUG, logContext,
|
|
"size: %lld; offset: %lld; fileHandleID: %s",
|
|
(long long)size, (long long)offset, ioInfo->fileHandleID);
|
|
|
|
}
|
|
#endif // LOG_DEBUG_MESSAGES
|
|
|
|
|
|
/**
|
|
* Compute the chunk-file offset on a storage server from a given user file position.
|
|
*
|
|
* Note: Make sure that the given stripeNodeIndex is really correct for the given pos.
|
|
*/
|
|
static int64_t __FhgfsOpsRemoting_getChunkOffset(int64_t pos, unsigned chunkSize, size_t numNodes,
|
|
size_t stripeNodeIndex)
|
|
{
|
|
/* the code below is an optimization (wrt division and modulo) of the following three lines:
|
|
int64_t posModChunkSize = (pos % chunkSize);
|
|
int64_t stripeSetStart = pos - posModChunkSize - (stripeNodeIndex*chunkSize);
|
|
return ( (stripeSetStart / numNodes) + posModChunkSize); */
|
|
|
|
/* note: "& (chunksize-1) only works as "% chunksize" replacement, because chunksize must be
|
|
a power of two */
|
|
|
|
int64_t posModChunkSize = pos & (chunkSize-1);
|
|
int64_t stripeSetStart = pos - posModChunkSize - (stripeNodeIndex*chunkSize);
|
|
|
|
int64_t stripeSetStartDivNumNodes;
|
|
|
|
// note: if numNodes is a power of two, we can do bit shifting instead of division
|
|
|
|
if(MathTk_isPowerOfTwo(numNodes) )
|
|
{ // quick path => bit shifting
|
|
stripeSetStartDivNumNodes = stripeSetStart >> MathTk_log2Int32(numNodes);
|
|
}
|
|
else
|
|
{ // slow path => division
|
|
// note: do_div(n64, base32) assigns the result to n64 and returns the remainder!
|
|
// (we need do_div to enable this division on 32bit archs)
|
|
|
|
stripeSetStartDivNumNodes = stripeSetStart; // will be changed by do_div()
|
|
do_div(stripeSetStartDivNumNodes, (unsigned)numNodes);
|
|
}
|
|
|
|
return (stripeSetStartDivNumNodes + posModChunkSize);
|
|
}
|
|
|
|
FhgfsOpsErr FhgfsOpsRemoting_hardlink(App* app, const char* fromName, unsigned fromLen,
|
|
const EntryInfo* fromInfo, const EntryInfo* fromDirInfo, const char* toName, unsigned toLen,
|
|
const EntryInfo* toDirInfo, const struct FileEvent* event)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
|
|
const char* logContext = "Remoting (hardlink)";
|
|
|
|
HardlinkMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(toDirInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
HardlinkRespMsg* hardlinkResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
HardlinkMsg_initFromEntryInfo(&requestMsg, fromDirInfo, fromName, fromLen, fromInfo, toDirInfo,
|
|
toName, toLen, event);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg, NETMSGTYPE_HardlinkResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
hardlinkResp = (HardlinkRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)HardlinkRespMsg_getValue(hardlinkResp);
|
|
|
|
if(retVal == FhgfsOpsErr_SUCCESS)
|
|
{
|
|
// success (nothing to be done here)
|
|
}
|
|
else
|
|
{
|
|
int logLevel = Log_NOTICE;
|
|
|
|
if( (retVal == FhgfsOpsErr_PATHNOTEXISTS) || (retVal == FhgfsOpsErr_INUSE) ||
|
|
(retVal == FhgfsOpsErr_EXISTS) )
|
|
logLevel = Log_DEBUG; // don't bother user with non-error messages
|
|
|
|
Logger_logFormatted(log, logLevel, logContext, "HardlinkResp error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal) );
|
|
}
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
|
|
/**
|
|
* Refresh the entry (meta-data update)
|
|
*/
|
|
FhgfsOpsErr FhgfsOpsRemoting_refreshEntry(App* app, const EntryInfo* entryInfo)
|
|
{
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "Remoting (refresh Entry)";
|
|
|
|
RefreshEntryInfoMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
RefreshEntryInfoRespMsg* refreshResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
RefreshEntryInfoMsg_initFromEntryInfo(&requestMsg, entryInfo );
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, (NetMessage*)&requestMsg,
|
|
NETMSGTYPE_RefreshEntryInfoResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
refreshResp = (RefreshEntryInfoRespMsg*)rrArgs.outRespMsg;
|
|
retVal = (FhgfsOpsErr)RefreshEntryInfoRespMsg_getResult(refreshResp);
|
|
|
|
if(retVal != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "StatResp error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal) );
|
|
IGNORE_UNUSED_VARIABLE(log);
|
|
IGNORE_UNUSED_VARIABLE(logContext);
|
|
}
|
|
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
|
|
FhgfsOpsErr FhgfsOpsRemoting_bumpFileVersion(App* app, const EntryInfo* entryInfo,
|
|
bool persistent, const struct FileEvent* event)
|
|
{
|
|
struct BumpFileVersionMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
struct BumpFileVersionRespMsg* bumpResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
BumpFileVersionMsg_init(&requestMsg, entryInfo, persistent, event);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, &requestMsg.netMessage,
|
|
NETMSGTYPE_BumpFileVersionResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if (requestRes != FhgfsOpsErr_SUCCESS)
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
bumpResp = (struct BumpFileVersionRespMsg*) rrArgs.outRespMsg;
|
|
retVal = bumpResp->base.value;
|
|
|
|
if (retVal != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
LOG_DEBUG_FORMATTED(app->logger, Log_DEBUG, __func__, "BumpFile error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal));
|
|
}
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|
|
|
|
FhgfsOpsErr FhgfsOpsRemoting_getFileVersion(App* app, const EntryInfo* entryInfo,
|
|
uint32_t* outVersion)
|
|
{
|
|
struct GetFileVersionMsg requestMsg;
|
|
RequestResponseNode rrNode = {
|
|
.peer = rrpeer_from_entryinfo(entryInfo),
|
|
.nodeStore = app->metaNodes,
|
|
.targetStates = app->metaStateStore,
|
|
.mirrorBuddies = app->metaBuddyGroupMapper
|
|
};
|
|
RequestResponseArgs rrArgs;
|
|
FhgfsOpsErr requestRes;
|
|
struct GetFileVersionRespMsg* getResp;
|
|
FhgfsOpsErr retVal;
|
|
|
|
// prepare request
|
|
GetFileVersionMsg_init(&requestMsg, entryInfo);
|
|
|
|
RequestResponseArgs_prepare(&rrArgs, NULL, &requestMsg.netMessage,
|
|
NETMSGTYPE_GetFileVersionResp);
|
|
|
|
// communicate
|
|
requestRes = MessagingTk_requestResponseNodeRetryAutoIntr(app, &rrNode, &rrArgs);
|
|
|
|
if (requestRes != FhgfsOpsErr_SUCCESS)
|
|
{ // clean-up
|
|
retVal = requestRes;
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
getResp = (struct GetFileVersionRespMsg*) rrArgs.outRespMsg;
|
|
retVal = getResp->result;
|
|
*outVersion = getResp->version;
|
|
|
|
if (retVal != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
LOG_DEBUG_FORMATTED(app->logger, Log_DEBUG, __func__, "GetFileVersion error code: %s",
|
|
FhgfsOpsErr_toErrString(retVal));
|
|
}
|
|
|
|
// clean-up
|
|
RequestResponseArgs_freeRespBuffers(&rrArgs, app);
|
|
|
|
cleanup_request:
|
|
return retVal;
|
|
}
|