2025-08-10 01:34:16 +02:00

952 lines
33 KiB
C

#include <common/Common.h> // (placed up here for LINUX_VERSION_CODE definition)
/**
* NFS export is probably not worth the backport efforts for kernels before 2.6.29
*/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
#include <common/storage/Metadata.h>
#include <common/toolkit/StringTk.h>
#include <filesystem/FhgfsInode.h>
#include <filesystem/FhgfsOpsInode.h>
#include <net/filesystem/FhgfsOpsRemoting.h>
#include <os/OsTypeConversion.h>
#include "FhgfsOpsExport.h"
#include "FhgfsOpsHelper.h"
#include "FhgfsOpsFile.h"
#include "FsDirInfo.h"
#include "FhgfsOpsDir.h"
/**
* Operations for NFS export (and open_by_handle).
*/
const struct export_operations fhgfs_export_ops =
{
.encode_fh = FhgfsOpsExport_encodeNfsFileHandle,
.fh_to_dentry = FhgfsOpsExport_nfsFileHandleToDentry,
.fh_to_parent = FhgfsOpsExport_nfsFileHandleToParent,
.get_parent = FhgfsOpsExport_getParentDentry,
.get_name = FhgfsOpsExport_getName,
};
// placed here to make it possible to inline it (compiler decision)
static int __FhgfsOpsExport_encodeNfsFileHandleV3(struct inode* inode, __u32* file_handle_buf,
int* max_len, const EntryInfo* parentInfo);
static bool __FhgfsOpsExport_iterateDirFindName(struct dentry* dentry, const char* entryID,
char* outName);
/**
* Single-byte handle type identifier that will be returned by _encodeNfsFileHandle and later be
* used by _nfsFileHandleToDentry to decode the handle.
*
* note: in-tree file systems use "enum fid_type" in linux/exportfs.h; this is our own version of
* it to let _nfsFileHandleToDentry know how to decode the FhgfsNfsFileHandleV2 structure.
* note: according to linux/exportfs.h, "the filesystem must not use the value '0' or '0xff'" as
* valid types; 0xff means given handle buffer size is too small.
*/
enum FhgfsNfsHandleType
{
FhgfsNfsHandle_STANDARD_V1 = 0xf1, /* some arbitrary number that doesn't conflict with others
in linux/exportfs.h (though that wouldn't be a real
conflict) to identify our standard valid handle. */
FhgfsNfsHandle_STANDARD_V2 = 0xf2, // Adds the parentOwnerNodeID
FhgfsNfsHandle_STANDARD_V3 = 0xf3, /* Adds isBuddyMirrored */
FhgfsNfsHandle_INVALID = 0xfe, /* special meaning: error occured, invalid handle
(this is only a hint for _nfsFileHandleToDentry,
because _encodeNfsFileHandle has no way to return an
error to the calling kernel code). */
FhgfsNfsHandle_BUFTOOSMALL = 0xff, /* special meaning for callers: given handle buffer
was too small */
};
typedef enum FhgfsNfsHandleType FhgfsNfsHandleType;
/**
* Encode a file handle (typically for NFS) that can later be used to lookup an inode via
* _nfsFileHanleToDentry().
*
* @param max_len file_handle_buf array length (=> length in 4-byte words), will be set to actually
* used or desired length.
* @param parent_inode/connectable if set, this means we should try to create a connectable file
* handle, so that we can later do a parent dir lookup from it.
*
* @return FhgfsNfsHandleType_...
*/
#ifndef KERNEL_HAS_ENCODE_FH_INODE
int FhgfsOpsExport_encodeNfsFileHandle(struct dentry* dentry, __u32* file_handle_buf, int* max_len,
int connectable)
{
struct inode* inode = dentry->d_inode;
struct inode* parent_inode = dentry->d_parent->d_inode;
if (connectable)
{
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(parent_inode);
const EntryInfo* parentInfo = FhgfsInode_getEntryInfo(fhgfsParentInode);
return __FhgfsOpsExport_encodeNfsFileHandleV3(inode, file_handle_buf, max_len, parentInfo);
}
else
return __FhgfsOpsExport_encodeNfsFileHandleV3(inode, file_handle_buf, max_len, NULL);
}
#else // LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
int FhgfsOpsExport_encodeNfsFileHandle(struct inode* inode, __u32* file_handle_buf, int* max_len,
struct inode* parent_inode)
{
if (parent_inode)
{
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(parent_inode);
const EntryInfo* parentInfo = FhgfsInode_getEntryInfo(fhgfsParentInode);
return __FhgfsOpsExport_encodeNfsFileHandleV3(inode, file_handle_buf, max_len, parentInfo);
}
else
return __FhgfsOpsExport_encodeNfsFileHandleV3(inode, file_handle_buf, max_len, NULL);
}
#endif // LINUX_VERSION_CODE
int __FhgfsOpsExport_encodeNfsFileHandleV3(struct inode* inode, __u32* file_handle_buf,
int* max_len, const EntryInfo* parentInfo)
{
FhgfsNfsHandleType retVal = FhgfsNfsHandle_STANDARD_V3;
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
struct FhgfsNfsFileHandleV3* fhgfsNfsHandle = (void*)file_handle_buf;
__u8* handleAsArray = (__u8 *)fhgfsNfsHandle; // Used for padding
size_t fhgfsNfsHandleLen = sizeof(struct FhgfsNfsFileHandleV3);
size_t givenHandleByteLength = (*max_len) * sizeof(__u32);
const EntryInfo* entryInfo;
bool parseParentIDRes;
bool parseEntryIDRes;
*max_len = (fhgfsNfsHandleLen + sizeof(__u32)-1) / sizeof(__u32); /* set desired/used max_len for
caller (4-byte words; "+sizeof(u32)-1" rounds up) */
// check whether given buf length is enough for our handle
if(givenHandleByteLength < fhgfsNfsHandleLen)
return FhgfsNfsHandle_BUFTOOSMALL;
// get entryInfo and serialize it in a special small format
// (normal string-based serialization would use too much buffer space)
FhgfsInode_entryInfoReadLock(fhgfsInode); // LOCK EntryInfo
entryInfo = FhgfsInode_getEntryInfo(fhgfsInode);
parseParentIDRes = __FhgfsOpsExport_parseEntryIDForNfsHandle(entryInfo->parentEntryID,
&fhgfsNfsHandle->parentEntryIDCounter, &fhgfsNfsHandle->parentEntryIDTimestamp,
&fhgfsNfsHandle->parentEntryIDNodeID);
if(unlikely(!parseParentIDRes) )
{ // parsing failed (but we have no real way to return an error)
retVal = FhgfsNfsHandle_INVALID;
goto cleanup;
}
parseEntryIDRes = __FhgfsOpsExport_parseEntryIDForNfsHandle(entryInfo->entryID,
&fhgfsNfsHandle->entryIDCounter, &fhgfsNfsHandle->entryIDTimestamp,
&fhgfsNfsHandle->entryIDNodeID);
if(unlikely(!parseEntryIDRes) )
{ // parsing failed (but we have no real way to return an error)
retVal = FhgfsNfsHandle_INVALID;
goto cleanup;
}
fhgfsNfsHandle->ownerNodeID = entryInfo->owner.node;
fhgfsNfsHandle->entryType = entryInfo->entryType;
fhgfsNfsHandle->isBuddyMirrored = EntryInfo_getIsBuddyMirrored(entryInfo);
if (parentInfo)
{
// NOTE: Does not need to be locked, as it is not a char* value
fhgfsNfsHandle->parentOwnerNodeID = parentInfo->owner.node;
}
else
fhgfsNfsHandle->parentOwnerNodeID = (NumNodeID){0};
// Pad remaining space between real file handle length and max_len with zeroes
for (size_t i = fhgfsNfsHandleLen; i < givenHandleByteLength; i++) {
handleAsArray[i] = 0x00;
}
cleanup:
FhgfsInode_entryInfoReadUnlock(fhgfsInode); // UNLOCK EntryInfo
return retVal;
}
/**
* Lookup an inode based on a file handle that was previously created via _encodeNfsFileHandle().
*
* @param fid file handle buffer
* @param fh_len fid buffer length in 4-byte words
* @param fileid_type FhgfsNfsHandleType_... (as returned by _encodeNfsFileHandle).
*/
struct dentry* FhgfsOpsExport_nfsFileHandleToDentry(struct super_block *sb, struct fid *fid,
int fh_len, int fileid_type)
{
App* app = FhgfsOps_getApp(sb);
const char* logContext = "NFS-handle-to-dentry";
FhgfsNfsFileHandleV2 fhgfsNfsHandleV2;
struct FhgfsNfsFileHandleV3 fhgfsNfsHandleV3;
struct FhgfsNfsFileHandleV3* fhgfsNfsHandle = (struct FhgfsNfsFileHandleV3*) fid->raw;
FhgfsNfsHandleType handleType = (FhgfsNfsHandleType)fileid_type;
void* rawHandle = fid->raw;
size_t fhgfsNfsHandleLen;
size_t givenHandleByteLength = fh_len * sizeof(__u32); // fh_len is in 4-byte words
static bool isFirstCall = true; // true if this method is called for the first time
FhgfsOpsHelper_logOp(Log_SPAM, app, NULL, NULL, logContext);
if (unlikely(isFirstCall) )
{
/* Our getattr functions assumes it is called after a lookup-intent and therefore does not
* validate the inode by default. However, this assumption is not true for NFS, and as result
* inode updates are never detected. NFS even caches readdir results and without inode updates
* even deleted entries are not detected. So we need to disable the getattr optimization for
* nfs exports.
*/
Config* cfg = App_getConfig(app);
Logger* log = App_getLogger(app);
Config_setTuneRefreshOnGetAttr(cfg);
Logger_logFormatted(log, Log_DEBUG, logContext,
"nfs export detected: auto enabling refresh-on-getattr");
isFirstCall = false;
}
// check if this handle is valid
if (unlikely(handleType != FhgfsNfsHandle_STANDARD_V1 &&
handleType != FhgfsNfsHandle_STANDARD_V2 &&
handleType != FhgfsNfsHandle_STANDARD_V3))
{
printk_fhgfs_debug(KERN_INFO, "%s: Called with invalid handle type: 0x%x\n",
__func__, fileid_type);
return NULL;
}
if (likely(handleType == FhgfsNfsHandle_STANDARD_V3))
fhgfsNfsHandleLen = sizeof(struct FhgfsNfsFileHandleV3);
else if (handleType == FhgfsNfsHandle_STANDARD_V2)
fhgfsNfsHandleLen = sizeof(FhgfsNfsFileHandleV2);
else
fhgfsNfsHandleLen = sizeof(FhgfsNfsFileHandleV1);
// check if given handle length is valid
if(unlikely(givenHandleByteLength < fhgfsNfsHandleLen) )
{
printk_fhgfs_debug(KERN_INFO, "%s: Called with too small handle length: "
"%d bytes; (handle type: 0x%x)\n",
__func__, (int)givenHandleByteLength, fileid_type);
return NULL;
}
if (unlikely(handleType == FhgfsNfsHandle_STANDARD_V1))
{ // old handle, generated before the update that creates V2 handles, convert V1 to V2
FhgfsNfsFileHandleV1* handleV1 = (FhgfsNfsFileHandleV1*) rawHandle;
fhgfsNfsHandleV2.entryIDCounter = handleV1->entryIDCounter;
fhgfsNfsHandleV2.entryIDNodeID = handleV1->entryIDNodeID;
fhgfsNfsHandleV2.entryIDTimestamp = handleV1->entryIDTimestamp;
fhgfsNfsHandleV2.ownerNodeID = handleV1->ownerNodeID;
fhgfsNfsHandleV2.parentEntryIDCounter = handleV1->parentEntryIDCounter;
fhgfsNfsHandleV2.parentEntryIDNodeID = handleV1->parentEntryIDNodeID;
fhgfsNfsHandleV2.parentEntryIDTimestamp = handleV1->parentEntryIDTimestamp;
fhgfsNfsHandleV2.entryType = handleV1->entryType;
fhgfsNfsHandleV2.parentOwnerNodeID = 0; // only available in V2 handles
rawHandle = &fhgfsNfsHandleV2;
handleType = FhgfsNfsHandle_STANDARD_V2;
}
if (unlikely(handleType == FhgfsNfsHandle_STANDARD_V2))
{
FhgfsNfsFileHandleV2* handleV2 = (FhgfsNfsFileHandleV2*) rawHandle;
fhgfsNfsHandleV3.entryIDCounter = handleV2->entryIDCounter;
fhgfsNfsHandleV3.entryIDNodeID.value = handleV2->entryIDNodeID;
fhgfsNfsHandleV3.entryIDTimestamp = handleV2->entryIDTimestamp;
fhgfsNfsHandleV3.ownerNodeID.value = handleV2->ownerNodeID;
fhgfsNfsHandleV3.parentEntryIDCounter = handleV2->parentEntryIDCounter;
fhgfsNfsHandleV3.parentEntryIDNodeID.value = handleV2->parentEntryIDNodeID;
fhgfsNfsHandleV3.parentEntryIDTimestamp = handleV2->parentEntryIDTimestamp;
fhgfsNfsHandleV3.entryType = handleV2->entryType;
fhgfsNfsHandleV3.parentOwnerNodeID.value = handleV2->parentOwnerNodeID;
fhgfsNfsHandleV3.isBuddyMirrored = 0;
fhgfsNfsHandle = &fhgfsNfsHandleV3;
handleType = FhgfsNfsHandle_STANDARD_V3;
}
return __FhgfsOpsExport_lookupDentryFromNfsHandle(sb, fhgfsNfsHandle, false);
}
/**
* Lookup an inode based on a file handle that was previously created via _encodeNfsFileHandle().
*
* @param fid file handle buffer
* @param fh_len fid buffer length in 4-byte words
* @param fileid_type FhgfsNfsHandleType_... (as returned by _encodeNfsFileHandle).
*
* Note: Always a V2 handle
*/
struct dentry* FhgfsOpsExport_nfsFileHandleToParent(struct super_block *sb, struct fid *fid,
int fh_len, int fileid_type)
{
App* app = FhgfsOps_getApp(sb);
const char* logContext = "NFS-handle-to-parent";
struct FhgfsNfsFileHandleV3* fhgfsNfsHandle = (struct FhgfsNfsFileHandleV3*)fid->raw;
FhgfsNfsHandleType handleType = (FhgfsNfsHandleType)fileid_type;
size_t fhgfsNfsHandleLen = sizeof(struct FhgfsNfsFileHandleV3);
size_t givenHandleByteLength = fh_len * sizeof(__u32); // fh_len is in 4-byte words
FhgfsOpsHelper_logOp(Log_SPAM, app, NULL, NULL, logContext);
// check if this handle is valid
if (unlikely(handleType != FhgfsNfsHandle_STANDARD_V2
&& handleType != FhgfsNfsHandle_STANDARD_V3))
{ // V1 handles didn't include the parentOwnerNodeID
#ifdef BEEGFS_DEBUG
if (handleType != FhgfsNfsHandle_STANDARD_V1)
printk_fhgfs_debug(KERN_INFO, "%s: Called with invalid handle type: 0x%x\n",
__func__, fileid_type);
#endif
return NULL;
}
// check if given handle length is valid
if(unlikely(givenHandleByteLength < fhgfsNfsHandleLen) )
{
printk_fhgfs_debug(KERN_INFO, "%s: Called with too small handle length: "
"%d bytes; (handle type: 0x%x)\n",
__func__, (int)givenHandleByteLength, fileid_type);
return NULL;
}
return __FhgfsOpsExport_lookupDentryFromNfsHandle(sb, fhgfsNfsHandle, true);
}
/**
* Check whether a dentry/inode for the nfs handle exists in the local cache and try server lookup
* otherwise.
*
* @param isParent if true the parent will be looked up.
*/
struct dentry* __FhgfsOpsExport_lookupDentryFromNfsHandle(struct super_block *sb,
struct FhgfsNfsFileHandleV3* fhgfsNfsHandle, bool lookupParent)
{
bool entryRes;
char* entryID = NULL;
size_t entryIDLen;
char* parentEntryID = NULL;
App* app = FhgfsOps_getApp(sb);
Logger* log = App_getLogger(app);
const char* logContext =
lookupParent ? "NFS-decode-handle-to-parent-dentry" : "NFS-decode-handle-to-dentry";
ino_t inodeHash; // (simply the inode number for us)
struct inode* inode;
FhgfsInodeComparisonInfo comparisonInfo;
EntryInfo entryInfo;
struct dentry* resDentry;
// generate entryID string
if (!lookupParent)
entryRes = __FhgfsOpsExport_entryIDFromNfsHandle(fhgfsNfsHandle->entryIDCounter,
fhgfsNfsHandle->entryIDTimestamp, fhgfsNfsHandle->entryIDNodeID, &entryID);
else
entryRes = __FhgfsOpsExport_entryIDFromNfsHandle(fhgfsNfsHandle->parentEntryIDCounter,
fhgfsNfsHandle->parentEntryIDTimestamp, fhgfsNfsHandle->parentEntryIDNodeID, &entryID);
if(unlikely(!entryRes) )
goto err_cleanup_entryids;
entryIDLen = strlen(entryID);
if (strncmp(entryID, META_ROOTDIR_ID_STR, entryIDLen) == 0)
inodeHash = BEEGFS_INODE_ROOT_INO;
else
inodeHash = FhgfsInode_generateInodeID(sb, entryID, entryIDLen);
comparisonInfo.inodeHash = inodeHash;
comparisonInfo.entryID = entryID;
inode = ilookup5(sb, inodeHash, __FhgfsOps_compareInodeID,
&comparisonInfo); // (ilookup5 calls iget() on match)
if(!inode)
{ // not found in cache => try to get it from mds
App* app = FhgfsOps_getApp(sb);
fhgfs_stat fhgfsStat;
struct kstat kstat;
FhgfsOpsErr statRes = FhgfsOpsErr_SUCCESS;
NumNodeID ownerNodeID;
FhgfsIsizeHints iSizeHints;
NumNodeID parentNodeID;
char* statParentEntryID;
unsigned int metaVersion;
if (!lookupParent)
{
// generate parentEntryID string
bool parentRes = __FhgfsOpsExport_entryIDFromNfsHandle(
fhgfsNfsHandle->parentEntryIDCounter, fhgfsNfsHandle->parentEntryIDTimestamp,
fhgfsNfsHandle->parentEntryIDNodeID, &parentEntryID);
if(unlikely(!parentRes) )
goto err_cleanup_entryids;
ownerNodeID = fhgfsNfsHandle->ownerNodeID;
}
else
{ // parentDir
ownerNodeID = fhgfsNfsHandle->parentOwnerNodeID;
/* Note: Needs to be special value to tell fhgfs-meta it is unknown!
* Must not be empty, as this would tell fhgfs-meta it is the root ID. */
parentEntryID = StringTk_strDup(EntryInfo_PARENT_ID_UNKNOWN);
}
FhgfsInode_initIsizeHints(NULL, &iSizeHints);
// init entry info
if (fhgfsNfsHandle->isBuddyMirrored)
EntryInfo_init(&entryInfo, NodeOrGroup_fromGroup(ownerNodeID.value), parentEntryID,
entryID, StringTk_strDup("<nfs_fh>"), fhgfsNfsHandle->entryType, 0);
else
EntryInfo_init(&entryInfo, NodeOrGroup_fromNode(ownerNodeID), parentEntryID, entryID,
StringTk_strDup("<nfs_fh>"), fhgfsNfsHandle->entryType, 0);
// communicate
statRes = FhgfsOpsRemoting_statAndGetParentInfo(app, &entryInfo, &fhgfsStat,
&parentNodeID, &statParentEntryID);
// the lookup of parent information may have failed. this can happen if the entry info we
// tried to stat describes a directory inode that is mirrored, but whose parent directory is
// not mirrored. similarly, an unmirrored directory with a mirrored parent directory can fail
// here. try again with the other choice of mirroring flag.
if (lookupParent && statRes == FhgfsOpsErr_PATHNOTEXISTS)
{
entryInfo.featureFlags ^= ENTRYINFO_FEATURE_BUDDYMIRRORED;
statRes = FhgfsOpsRemoting_statAndGetParentInfo(app, &entryInfo, &fhgfsStat,
&parentNodeID, &statParentEntryID);
}
if(statRes != FhgfsOpsErr_SUCCESS)
goto err_cleanup_entryinfo;
// entry found => create inode
metaVersion = fhgfsStat.metaVersion;
OsTypeConv_kstatFhgfsToOs(&fhgfsStat, &kstat);
kstat.ino = inodeHash;
inode = __FhgfsOps_newInode(sb, &kstat, 0, &entryInfo, &iSizeHints, metaVersion);
if (likely(inode) )
{
if (parentNodeID.value)
{
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
FhgfsInode_setParentNodeID(fhgfsInode, parentNodeID);
}
if (statParentEntryID)
{ // update the parentEntryID
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
// make absolute sure we use the right entryInfo
FhgfsInode_entryInfoWriteLock(fhgfsInode); // L O C K
EntryInfo_updateSetParentEntryID(&fhgfsInode->entryInfo, statParentEntryID);
FhgfsInode_entryInfoWriteUnlock(fhgfsInode); // U N L O C K
}
}
}
else
SAFE_KFREE(entryID); // ilookup5 found an existing inode, free the comparison entryID
if (unlikely(!inode) )
goto err_cleanup_entryinfo;
// (d_obtain_alias can also handle pointer error codes and NULL)
resDentry = d_obtain_alias(inode);
if (resDentry && !IS_ERR(resDentry) )
{
#ifndef KERNEL_HAS_S_D_OP
resDentry->d_op = &fhgfs_dentry_ops;
#endif // KERNEL_HAS_S_D_OP
if (unlikely(Logger_getLogLevel(log) >= Log_SPAM) )
{
FhgfsInode* fhgfsInode = BEEGFS_INODE(resDentry->d_inode);
const EntryInfo* entryInfo;
FhgfsInode_entryInfoReadLock(fhgfsInode); // L O C K fhgfsInode
entryInfo = FhgfsInode_getEntryInfo(fhgfsInode);
Logger_logFormatted(log, Log_SPAM, logContext, "result dentry inode id: %s",
entryInfo->entryID);
FhgfsInode_entryInfoReadUnlock(fhgfsInode); // U N L O C K fhgfsInode
}
}
return resDentry;
err_cleanup_entryids:
SAFE_KFREE(parentEntryID);
SAFE_KFREE(entryID);
return ERR_PTR(-ESTALE);
err_cleanup_entryinfo:
EntryInfo_uninit(&entryInfo);
return ERR_PTR(-ESTALE);
}
/**
* Parse an entryID string to get its three components.
*
* Note: META_ROOTDIR_ID_STR is a special case (all three components are set to 0).
*
* @return false on error
*/
bool __FhgfsOpsExport_parseEntryIDForNfsHandle(const char* entryID, uint32_t* outCounter,
uint32_t* outTimestamp, NumNodeID* outNodeID)
{
const int numEntryIDComponents = 3; // sscanf must find 3 components in a valid entryID string
uint32_t nodeID32; //just a tmp variable, because 16-bit outNodeID cannot be used with sscanf %X
int scanRes;
if(!strcmp(META_ROOTDIR_ID_STR, entryID) )
{ // special case: this is the root ID, which doesn't have the usual three components
*outCounter = 0;
*outTimestamp = 0;
*outNodeID = (NumNodeID){0};
return true;
}
scanRes = sscanf(entryID, "%X-%X-%X", outCounter, outTimestamp, &nodeID32);
if(unlikely(scanRes != numEntryIDComponents) )
{ // parsing failed
printk_fhgfs_debug(KERN_INFO, "%s: Parsing of entryID failed. entryID: %s\n",
__func__, entryID);
return false;
}
*outNodeID = (NumNodeID){nodeID32};
return true;
}
/**
* Generate an entryID string from the NFS handle components.
*
* @param outEntryID will be kmalloced on success and needs to be kfree'd by the caller
* @return false on error
*/
bool __FhgfsOpsExport_entryIDFromNfsHandle(uint32_t counter, uint32_t timestamp,
NumNodeID nodeID, char** outEntryID)
{
if(!counter && !timestamp && !nodeID.value)
{ // special case: root ID
*outEntryID = StringTk_strDup(META_ROOTDIR_ID_STR);
}
else
*outEntryID = StringTk_kasprintf("%X-%X-%X", counter, timestamp, (uint32_t)nodeID.value);
return (*outEntryID != NULL); // (just in case kmalloc failed)
}
/**
* getParentDentry - export_operations->get_parent function
*
* Get the directory dentry (and inode) that has childDentry
*
* Note: We do not lock childDentry's EntryInfo here, as childDentry does not have a connected
* path yet, so childDentry's EntryInfo also cannot change.
*/
struct dentry* FhgfsOpsExport_getParentDentry(struct dentry* childDentry)
{
int retVal = -ESTALE;
struct super_block* superBlock = childDentry->d_sb;
App* app = FhgfsOps_getApp(superBlock);
Logger* log = App_getLogger(app);
const char* logContext = "Export_getParentDentry";
struct inode* parentInode;
struct inode* childInode = childDentry->d_inode;
FhgfsInode* fhgfsChildInode = BEEGFS_INODE(childInode);
EntryInfo childEntryInfoCopy;
FhgfsInodeComparisonInfo comparisonInfo;
size_t parentIDLen;
const char* parentEntryID;
struct dentry* parentDentry = NULL;
FhgfsInode_entryInfoReadLock(fhgfsChildInode); // L O C K childInode
EntryInfo_dup(FhgfsInode_getEntryInfo(fhgfsChildInode), &childEntryInfoCopy);
FhgfsInode_entryInfoReadUnlock(fhgfsChildInode); // U N L O C K childInode
parentEntryID = EntryInfo_getParentEntryID(&childEntryInfoCopy);
/* NOTE: IS_ROOT() would not work, as any childDentry is not connected to its parent
* and IS_ROOT() would *always* be true here. */
if (strlen(parentEntryID) == 0)
{
/* This points to a bug, as we should never be called for the root dentry and so this means
* either root was not correctly identified or setting the parentEntryID failed */
Logger_logErrFormatted(log, Log_ERR, logContext,
"Bug: Root does not have parentEntryID set!");
retVal = -EINVAL;
goto outErr;
}
parentIDLen = strlen(parentEntryID);
comparisonInfo.entryID = parentEntryID;
if (strncmp(parentEntryID, META_ROOTDIR_ID_STR, parentIDLen) == 0)
comparisonInfo.inodeHash = BEEGFS_INODE_ROOT_INO; // root inode
else
comparisonInfo.inodeHash = FhgfsInode_generateInodeID(superBlock,
parentEntryID, strlen(parentEntryID) );
Logger_logFormatted(log, Log_SPAM, logContext, "Find inode for ID: %s inodeHash: %lu",
comparisonInfo.entryID, comparisonInfo.inodeHash);
parentInode = ilookup5(superBlock, comparisonInfo.inodeHash, __FhgfsOps_compareInodeID,
&comparisonInfo); // (ilookup5 calls iget() on match)
if(!parentInode)
{ // not found in cache => try to get it from mds
fhgfs_stat fhgfsStat;
struct kstat kstat;
FhgfsOpsErr statRes = FhgfsOpsErr_SUCCESS;
const char* fileName = StringTk_strDup("<nfs_fh>");
EntryInfo parentInfo;
unsigned int metaVersion;
NumNodeID parentNodeID = (NumNodeID){0};
char* parentEntryID = NULL;
NumNodeID parentOwnerNodeID;
FhgfsIsizeHints iSizeHints;
/* Note: Needs to be (any) special value to tell fhgfs-meta it is unknown!
* Must not be empty, as this would tell fhgfs-meta it is the root ID. */
char* grandParentID = StringTk_strDup(EntryInfo_PARENT_ID_UNKNOWN);
parentOwnerNodeID = FhgfsInode_getParentNodeID(fhgfsChildInode);
if (parentOwnerNodeID.value == 0)
{ /* Hmm, so we don't know the real ownerNodeId, we try the childs ID, but if that fails
* with FhgfsOpsErr_NOTOWNER, we would need to cycle through all meta-targets, which
* is too slow and we therefore don't do that, but hope the client can
* recover -ESTALE itself */
parentOwnerNodeID = childEntryInfoCopy.owner.node;
}
// generate parentEntryID string
if (EntryInfo_getIsBuddyMirrored(&childEntryInfoCopy))
EntryInfo_init(&parentInfo, NodeOrGroup_fromGroup(parentOwnerNodeID.value),
grandParentID, StringTk_strDup(comparisonInfo.entryID), fileName,
DirEntryType_DIRECTORY, 0);
else
EntryInfo_init(&parentInfo, NodeOrGroup_fromNode(parentOwnerNodeID), grandParentID,
StringTk_strDup(comparisonInfo.entryID), fileName, DirEntryType_DIRECTORY, 0);
// communicate
FhgfsInode_initIsizeHints(NULL, &iSizeHints);
statRes = FhgfsOpsRemoting_statAndGetParentInfo(app, &parentInfo, &fhgfsStat,
&parentNodeID, &parentEntryID);
// the lookup of parent information may have failed. this can happen if the entry info we
// tried to stat describes a directory inode that is mirrored, but whose parent directory
// is not mirrored. similarly, an unmirrored directory with a mirrored parent directory
// can fail here. try again with the other choice of mirroring flag.
if (statRes == FhgfsOpsErr_PATHNOTEXISTS)
{
parentInfo.featureFlags ^= ENTRYINFO_FEATURE_BUDDYMIRRORED;
statRes = FhgfsOpsRemoting_statAndGetParentInfo(app, &parentInfo, &fhgfsStat,
&parentNodeID, &parentEntryID);
}
if(statRes != FhgfsOpsErr_SUCCESS)
{
EntryInfo_uninit(&parentInfo);
goto outErr;
}
// entry found => create inode
metaVersion = fhgfsStat.metaVersion;
OsTypeConv_kstatFhgfsToOs(&fhgfsStat, &kstat);
kstat.ino = comparisonInfo.inodeHash;
parentInode = __FhgfsOps_newInodeWithParentID(superBlock, &kstat, 0, &parentInfo,
parentNodeID, &iSizeHints, metaVersion);
if (likely(parentInode) && parentEntryID)
{ // update the parentEntryID
FhgfsInode* parentFhgfsInode = BEEGFS_INODE(parentInode);
// make absolute sure we use the right entryInfo
FhgfsInode_entryInfoWriteLock(parentFhgfsInode); // L O C K
EntryInfo_updateSetParentEntryID(&parentFhgfsInode->entryInfo, parentEntryID);
FhgfsInode_entryInfoWriteUnlock(parentFhgfsInode); // U N L O C K
}
}
// (d_obtain_alias can also handle pointer error codes and NULL)
parentDentry = d_obtain_alias(parentInode);
if (parentDentry && !IS_ERR(parentDentry) )
{
#ifndef KERNEL_HAS_S_D_OP
parentDentry->d_op = &fhgfs_dentry_ops;
#endif // KERNEL_HAS_S_D_OP
}
// printk(KERN_INFO "%s: d_obtain_alias(inode) res: %ld\n", __func__, IS_ERR(parentDentry) );
EntryInfo_uninit(&childEntryInfoCopy);
return parentDentry;
outErr:
EntryInfo_uninit(&childEntryInfoCopy);
return ERR_PTR(retVal);
}
/**
* getName - export_operations->get_name function
*
* calls readdir on the parent until it finds an entry with
* the same entryID as the child, and returns that.
* @param dirDentry the directory in which to find a name
* @param outName a pointer to a %NAME_MAX+1 char buffer to store the name
* @param child the dentry for the child directory.
*/
int FhgfsOpsExport_getName(struct dentry* dirDentry, char *outName, struct dentry *child)
{
struct inode *dirInode = dirDentry->d_inode;
struct inode *childInode = child->d_inode;
FhgfsInode* fhgfsChildInode = BEEGFS_INODE(childInode);
EntryInfo childEntryInfoCopy;
const char* childEntryID;
int retVal;
bool findRes;
retVal = -ENOTDIR;
if (!dirInode || !S_ISDIR(dirInode->i_mode))
goto out;
retVal = -EINVAL;
if (!dirInode->i_fop)
goto out;
FhgfsInode_entryInfoReadLock(fhgfsChildInode); // LOCK childInode
EntryInfo_dup(FhgfsInode_getEntryInfo(fhgfsChildInode), &childEntryInfoCopy);
FhgfsInode_entryInfoReadUnlock(fhgfsChildInode); // UNLOCK childInode
childEntryID = EntryInfo_getEntryID(&childEntryInfoCopy);
findRes = __FhgfsOpsExport_iterateDirFindName(dirDentry, childEntryID, outName);
if (!findRes)
{
retVal = -ESTALE;
goto outUnitEntryInfo;
}
retVal = 0;
outUnitEntryInfo:
EntryInfo_uninit(&childEntryInfoCopy);
out:
return retVal;
}
/**
* Find the name of an entry with the given entryID in the directory dirDentry.
*
* Note: This uses a rather slow client-side readdir() to find the entry.
* Maybe we should add another NetMsg and do it directly on the server!
*/
bool __FhgfsOpsExport_iterateDirFindName(struct dentry* dirDentry, const char* entryID,
char* outName)
{
struct super_block* superBlock = dirDentry->d_sb;
App* app = FhgfsOps_getApp(superBlock);
Logger* log = App_getLogger(app);
const char* logContext = "FhgfsOpsExport_readdirFindName";
bool retVal = false;
struct inode* dirInode = dirDentry->d_inode;
FhgfsInode* fhgfsDirInode = BEEGFS_INODE(dirInode);
size_t contentsPos = 0;
size_t contentsLength;
StrCpyVec* dirContents;
StrCpyVec* dirContentIDs;
EntryInfo dirEntryInfoCopy;
FsDirInfo dirInfo;
FsDirInfo_init(&dirInfo, app);
dirContents = FsDirInfo_getDirContents(&dirInfo);
dirContentIDs = FsDirInfo_getEntryIDs(&dirInfo);
FhgfsInode_entryInfoReadLock(fhgfsDirInode); // L O C K EntryInfo
EntryInfo_dup(FhgfsInode_getEntryInfo(fhgfsDirInode), &dirEntryInfoCopy);
FhgfsInode_entryInfoReadUnlock(fhgfsDirInode); // U N L O C K EntryInfo
if(unlikely(Logger_getLogLevel(log) >= 5) )
{
const EntryInfo* dirInfo = FhgfsInode_getEntryInfo(fhgfsDirInode);
struct inode* inode = dirDentry->d_inode;
FhgfsOpsHelper_logOpMsg(Log_SPAM, app, dirDentry, inode, logContext,
"dir-id: %s searchID: %s", dirInfo->entryID, entryID);
}
for( ; ; ) // loop as long as we didn't find entryID and as long as the dir has entries
{
int refreshRes;
char* currentName;
const char* currentEntryID;
refreshRes = FhgfsOpsHelper_refreshDirInfoIncremental(app,
&dirEntryInfoCopy, &dirInfo, false);
if(unlikely(refreshRes) )
{ // error occurred
break;
}
contentsPos = FsDirInfo_getCurrentContentsPos(&dirInfo);
contentsLength = StrCpyVec_length(dirContents);
#if 0
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext,
"contentsPos: %lld/%lld, endOfDir: %s",
(long long)contentsPos, (long long)contentsLength,
FsDirInfo_getEndOfDir(&dirInfo) ? "yes" : "no");
#endif
// refreshDirInfoInc guarantees that we either have a valid range for current offset
// or that contentsLength is empty
if(!contentsLength)
{ // end of dir
LOG_DEBUG(log, Log_SPAM, logContext, "reached end of dir");
break;
}
currentName = StrCpyVec_at(dirContents, contentsPos);
currentEntryID = StrCpyVec_at(dirContentIDs, contentsPos);
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext,
"searchID: %s current dir-entry: %s entryID: %s ",
entryID, currentName, currentEntryID);
if(!strcmp(currentEntryID, entryID) )
{ // match found
// note: out name buf is guaranteed to be NAME_MAX+1 according to <linux/exportfs.h>
StringTk_strncpyTerminated(outName, currentName, NAME_MAX+1);
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext, "Found childName: %s", outName);
retVal = true;
break;
}
FsDirInfo_setCurrentContentsPos(&dirInfo, contentsPos + 1);
} // end of for-loop
// clean-up
FsDirInfo_uninit((FsObjectInfo*) &dirInfo);
EntryInfo_uninit(&dirEntryInfoCopy);
return retVal;
}
#endif // LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)