2929 lines
91 KiB
C
2929 lines
91 KiB
C
#include <app/log/Logger.h>
|
|
#include <app/App.h>
|
|
#include <app/config/Config.h>
|
|
#include <common/net/message/storage/lookup/LookupIntentRespMsg.h> // response flags
|
|
#include <common/toolkit/vector/StrCpyVec.h>
|
|
#include <common/storage/StorageErrors.h>
|
|
#include <common/toolkit/StringTk.h>
|
|
#include <common/toolkit/MetadataTk.h>
|
|
#include <os/OsCompat.h>
|
|
#include <os/OsTypeConversion.h>
|
|
#include "FhgfsOpsSuper.h"
|
|
#include "FhgfsOpsDir.h"
|
|
#include "FhgfsOpsInode.h"
|
|
#include "FhgfsOpsFile.h"
|
|
#include "FhgfsOpsFileNative.h"
|
|
#include "FhgfsOpsHelper.h"
|
|
|
|
#include <linux/namei.h>
|
|
#include <linux/backing-dev.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/file.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/posix_acl.h>
|
|
#include <linux/xattr.h>
|
|
|
|
|
|
static struct kmem_cache* FhgfsInodeCache = NULL;
|
|
|
|
#define FhgfsOpsInode_CACHE_NAME BEEGFS_MODULE_NAME_STR "_inode_cache"
|
|
|
|
|
|
static void FhgfsOps_newAttrToInode(struct iattr* iAttr, struct inode* outInode);
|
|
|
|
static __always_inline int maybeRefreshInode(struct inode* inode, bool whenCacheInvalid,
|
|
bool withFileSize, bool force)
|
|
{
|
|
App* app = FhgfsOps_getApp(inode->i_sb);
|
|
Config* cfg = app->cfg;
|
|
|
|
if(unlikely(!FhgfsOps_getIsRootInited(inode->i_sb) && inode->i_ino == BEEGFS_INODE_ROOT_INO)
|
|
|| (!FhgfsInode_isCacheValid(BEEGFS_INODE(inode), inode->i_mode, cfg) && whenCacheInvalid)
|
|
|| unlikely(force))
|
|
{
|
|
FhgfsIsizeHints iSizeHints;
|
|
int refreshRes;
|
|
|
|
refreshRes = __FhgfsOps_doRefreshInode(app, inode, NULL, &iSizeHints, !withFileSize);
|
|
if(refreshRes)
|
|
return refreshRes;
|
|
|
|
if (inode->i_ino == BEEGFS_INODE_ROOT_INO)
|
|
FhgfsOps_setIsRootInited(inode->i_sb, true);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Find out whether a directory entry exists. Add the dentry and create/associate the
|
|
* corresponding inode in case it does exist.
|
|
*
|
|
* Note: Newer version, superseding the old _lookup() with more efficient messaging based on
|
|
* combined intent requests.
|
|
*
|
|
* @param dentry the thing that we are looking for.
|
|
* @param flags LOOKUP_...
|
|
* @return NULL if we're using the given new dentry, or pointer to another dentry if we
|
|
* find out that it already existed (e.g. from a NFS handle), or ERR_PTR(x) with x being a negative
|
|
* linux error code.
|
|
*/
|
|
#ifndef KERNEL_HAS_ATOMIC_OPEN
|
|
struct dentry* FhgfsOps_lookupIntent(struct inode* parentDir, struct dentry* dentry,
|
|
struct nameidata* nameidata)
|
|
#else
|
|
struct dentry* FhgfsOps_lookupIntent(struct inode* parentDir, struct dentry* dentry,
|
|
unsigned flags)
|
|
#endif // KERNEL_HAS_ATOMIC_OPEN
|
|
{
|
|
App* app = FhgfsOps_getApp(dentry->d_sb);
|
|
Config* cfg = App_getConfig(app);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_lookupIntent";
|
|
|
|
struct dentry* returnDentry = NULL; // can be NULL or existing dentry or ERR_PTR
|
|
FhgfsOpsErr statRes;
|
|
fhgfs_stat fhgfsStat;
|
|
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(parentDir);
|
|
EntryInfo newEntryInfo;
|
|
const char* entryName = dentry->d_name.name;
|
|
bool freeNewEntryInfo = false;
|
|
struct inode* inode = dentry->d_inode;
|
|
|
|
FhgfsIsizeHints iSizeHints;
|
|
|
|
Time now;
|
|
Time_setToNowReal(&now);
|
|
|
|
// For validating the cache, this field is updated
|
|
// with CURRENT_TIME on first lookup
|
|
|
|
dentry->d_time = (now.tv_sec * 1000000000UL + now.tv_nsec);
|
|
|
|
if(unlikely(Logger_getLogLevel(log) >= Log_SPAM) )
|
|
FhgfsOpsHelper_logOp(Log_SPAM, app, dentry, inode, logContext);
|
|
|
|
if(unlikely(dentry->d_name.len > NAME_MAX) )
|
|
return ERR_PTR(-ENAMETOOLONG);
|
|
|
|
/* check if root inode attribs have been fetched already
|
|
(because the kernel doesn't do lookup/revalidate for the root inode) */
|
|
|
|
{
|
|
int refreshRes = maybeRefreshInode(parentDir, true, false, false);
|
|
|
|
// root permissions might have changed now => recheck permissions
|
|
if (!refreshRes)
|
|
refreshRes = os_generic_permission(parentDir, MAY_EXEC);
|
|
|
|
if (refreshRes)
|
|
return ERR_PTR(refreshRes);
|
|
}
|
|
|
|
|
|
// retrieve remote stat info for given entry...
|
|
|
|
if(unlikely(IS_ROOT(dentry) ) )
|
|
{ // root inode is special (though the kernel never actually does a root lookup => unlikely)
|
|
bool isGetSuccess = MetadataTk_getRootEntryInfoCopy(app, &newEntryInfo);
|
|
|
|
freeNewEntryInfo = true;
|
|
|
|
if (isGetSuccess)
|
|
statRes = FhgfsOpsRemoting_statDirect(app, &newEntryInfo, &fhgfsStat);
|
|
else
|
|
statRes = FhgfsOpsErr_INTERNAL;
|
|
}
|
|
else
|
|
{ // just a normal subentry of some dir => lookup entryInfoPtr and stat it
|
|
FhgfsOpsErr remotingRes;
|
|
LookupIntentInfoIn inInfo; // input data for combo-request
|
|
LookupIntentInfoOut outInfo; // result data of combo-request
|
|
|
|
FhgfsInode_initIsizeHints(NULL, &iSizeHints);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsParentInode); // LOCK EntryInfo
|
|
|
|
LookupIntentInfoIn_init(&inInfo, FhgfsInode_getEntryInfo(fhgfsParentInode), entryName);
|
|
|
|
LookupIntentInfoOut_prepare(&outInfo, &newEntryInfo, &fhgfsStat);
|
|
|
|
remotingRes = FhgfsOpsRemoting_lookupIntent(app, &inInfo, &outInfo);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
|
|
if(remotingRes != FhgfsOpsErr_SUCCESS || outInfo.lookupRes != FhgfsOpsErr_SUCCESS)
|
|
statRes = outInfo.lookupRes; // entry not found
|
|
else
|
|
{ // lookup successful (entry exists)
|
|
statRes = outInfo.statRes;
|
|
if(unlikely(!(outInfo.responseFlags & LOOKUPINTENTRESPMSG_FLAG_STAT) ) )
|
|
{
|
|
Logger_logErrFormatted(log, logContext, "Unexpected stat info missing: %s",
|
|
entryName);
|
|
}
|
|
|
|
freeNewEntryInfo = true;
|
|
|
|
// EntryInfo uninitialize already handled
|
|
LookupIntentInfoOut_setEntryInfoPtr(&outInfo, NULL);
|
|
|
|
if(statRes == FhgfsOpsErr_NOTOWNER)
|
|
{ // metadata not owned by parent/lookup server => need separate stat remoting
|
|
statRes = FhgfsOpsRemoting_statDirect(app, &newEntryInfo, &fhgfsStat);
|
|
}
|
|
}
|
|
|
|
LookupIntentInfoOut_uninit(&outInfo);
|
|
|
|
}
|
|
|
|
// handle result of stat retrieval (e.g. create new inode)...
|
|
|
|
if(statRes != FhgfsOpsErr_SUCCESS)
|
|
{ // stat error (e.g. entry doesn't exist)
|
|
if(statRes == FhgfsOpsErr_PATHNOTEXISTS)
|
|
{
|
|
/* note: "not exists" does not mean that we return a lookup error. the kernel will check
|
|
after lookup() whether we attached an inode to this dentry and handle it accordingly. */
|
|
#ifndef KERNEL_HAS_S_D_OP
|
|
dentry->d_op = &fhgfs_dentry_ops; // (see below for KERNEL_HAS_S_D_OP comments)
|
|
#endif // KERNEL_HAS_S_D_OP
|
|
|
|
d_add(dentry, NULL);
|
|
}
|
|
else
|
|
returnDentry = ERR_PTR(FhgfsOpsErr_toSysErr(statRes) );
|
|
|
|
}
|
|
else
|
|
{ // entry exists => create inode
|
|
struct kstat kstat;
|
|
struct inode* newInode;
|
|
unsigned int metaVersion = fhgfsStat.metaVersion;
|
|
|
|
OsTypeConv_kstatFhgfsToOs(&fhgfsStat, &kstat);
|
|
|
|
kstat.ino = FhgfsInode_generateInodeID(dentry->d_sb,
|
|
newEntryInfo.entryID, strlen(newEntryInfo.entryID) );
|
|
|
|
newInode = __FhgfsOps_newInode(parentDir->i_sb, &kstat, 0, &newEntryInfo, &iSizeHints, metaVersion);
|
|
|
|
freeNewEntryInfo = false; // newEntryInfo now owned or freed by _newInode()
|
|
|
|
if(unlikely(!newInode || IS_ERR(newInode) ) )
|
|
returnDentry = IS_ERR(newInode) ? ERR_PTR(PTR_ERR(newInode) ) : ERR_PTR(-EACCES);
|
|
else
|
|
{ // new inode created
|
|
|
|
#ifndef KERNEL_HAS_S_D_OP
|
|
/* per-dentry d_ops are deprecated as of linux 2.6.38 (commit c8aebb0c9f8c7471643d5f).
|
|
they are now set on the superblock. (individual per-dentry d_ops could still be set
|
|
with d_set_d_op(), but are not required for us.) */
|
|
|
|
dentry->d_op = &fhgfs_dentry_ops;
|
|
#endif // KERNEL_HAS_S_D_OP
|
|
|
|
if (Config_getSysXAttrsCheckCapabilities(cfg) == CHECKCAPABILITIES_Never)
|
|
// The configuration is to never check for capabilities on writes, so use
|
|
// "inode_has_no_xattr" which does exactly one thing and that is to set the flag
|
|
// "S_NOSEC" on the inode, if the inode doesn't have the setuid and setgid bits set and
|
|
// the superblock flag "SB_NOSEC" is set. We set that flag on the superblock according
|
|
// to user configuration. When "S_NOSEC" is set on the inode, the kernel will skip
|
|
// checking for capabilities on every write operation.
|
|
inode_has_no_xattr(newInode);
|
|
|
|
if (S_ISDIR(newInode->i_mode) )
|
|
returnDentry = d_materialise_unique(dentry, newInode);
|
|
else
|
|
{
|
|
returnDentry = d_splice_alias(newInode, dentry); /* (d_splice_alias() also replaces a
|
|
disconnected dentry that was created from a nfs handle) */
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// clean-up
|
|
if(freeNewEntryInfo)
|
|
EntryInfo_uninit(&newEntryInfo);
|
|
|
|
return returnDentry;
|
|
}
|
|
|
|
|
|
#ifdef KERNEL_HAS_STATX
|
|
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
|
extern int FhgfsOps_getattr(struct mnt_idmap* idmap, const struct path* path,
|
|
struct kstat* kstat, u32 request_mask, unsigned int query_flags)
|
|
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
|
extern int FhgfsOps_getattr(struct user_namespace* mnt_userns, const struct path* path,
|
|
struct kstat* kstat, u32 request_mask, unsigned int query_flags)
|
|
#else
|
|
int FhgfsOps_getattr(const struct path* path, struct kstat* kstat, u32 request_mask,
|
|
unsigned int query_flags)
|
|
#endif
|
|
{
|
|
struct vfsmount* mnt = path->mnt;
|
|
struct dentry* dentry = path->dentry;
|
|
|
|
bool mustQuery = (query_flags & AT_STATX_SYNC_TYPE) == AT_STATX_FORCE_SYNC;
|
|
|
|
#else
|
|
int FhgfsOps_getattr(struct vfsmount* mnt, struct dentry* dentry, struct kstat* kstat)
|
|
{
|
|
const bool mustQuery = false;
|
|
|
|
#endif
|
|
App* app = FhgfsOps_getApp(dentry->d_sb);
|
|
Config* cfg = App_getConfig(app);
|
|
const char* logContext = "FhgfsOps_getattr";
|
|
|
|
/* Note: Do not use IS_ROOT(dentry) as this does not work for NFS file dentries
|
|
* (disconnected dentries), i.e. after a server reboot */
|
|
bool isRoot = (dentry == mnt->mnt_root) ? true : false;
|
|
|
|
int retVal = 0;
|
|
struct inode* inode = dentry->d_inode;
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
|
|
bool refreshOnGetAttr = Config_getTuneRefreshOnGetAttr(cfg);
|
|
|
|
FhgfsOpsHelper_logOp(5, app, dentry, inode, logContext);
|
|
|
|
/* note: we assumed that we could live without refreshInode() here, because either lookup() or
|
|
revalidate() was called before. But that assumption is wrong when a open file is fstat'ed
|
|
(which is relevant for e.g. "tail -f"), so we only use this optimization for closed files.
|
|
The root dir inode is also an important exception here, because it is not being revalidated
|
|
by the kernel.
|
|
|
|
note on dirs: we could always refresh dirs here because of caching and tools like "find"
|
|
relying on an updated link count, but while a user is creating subdirs, the stat from "find"
|
|
can never be up-to-date, so that would also be quite useless. */
|
|
|
|
retVal = maybeRefreshInode(inode,
|
|
isRoot || FhgfsInode_getIsFileOpen(fhgfsInode) || refreshOnGetAttr, true, mustQuery);
|
|
|
|
if(!retVal)
|
|
{
|
|
#if defined(KERNEL_HAS_GENERIC_FILLATTR_REQUEST_MASK)
|
|
os_generic_fillattr(inode, kstat, request_mask);
|
|
#else
|
|
os_generic_fillattr(inode, kstat);
|
|
#endif
|
|
|
|
kstat->blksize = Config_getTuneInodeBlockSize(cfg);
|
|
|
|
if(isRoot)
|
|
kstat->ino = BEEGFS_INODE_ROOT_INO; // root => assign the constant root inode number
|
|
else
|
|
if (is_32bit_api() && kstat->ino > UINT_MAX)
|
|
{ // 32-bit applications cannot handle 64bit inodes and would fail with EOVERFLOW
|
|
kstat->ino = kstat->ino >> 32;
|
|
}
|
|
}
|
|
|
|
// clean-up
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Get a list of extended attributes of a file, copy the list into a buffer as null-separated string
|
|
* list; or compute the size of the buffer required.
|
|
* @param value Pointer to the buffer. NULL to compute the size of the buffer required.
|
|
* @param size Size of the buffer.
|
|
* @return Negative error number on failure, number of bytes used / required on success.
|
|
*/
|
|
ssize_t FhgfsOps_listxattr(struct dentry* dentry, char* value, size_t size)
|
|
{
|
|
App* app = FhgfsOps_getApp(dentry->d_sb);
|
|
FhgfsOpsErr remotingRes;
|
|
ssize_t resSize;
|
|
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(dentry->d_inode);
|
|
|
|
int refreshRes = maybeRefreshInode(dentry->d_inode, true, false, false);
|
|
if (refreshRes)
|
|
return refreshRes;
|
|
|
|
FhgfsOpsHelper_logOpDebug(app, dentry, NULL, __func__, "(size: %u)", size);
|
|
|
|
// "Just get the size, don't return any value" is signaled by value=NULL. Since we only send
|
|
// the "size" parameter to the server, we make sure that size is set to 0 in that case.
|
|
if(!value)
|
|
size = 0;
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsInode);
|
|
|
|
remotingRes = FhgfsOpsRemoting_listXAttr(app, FhgfsInode_getEntryInfo(fhgfsInode), value, size,
|
|
&resSize);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsInode);
|
|
|
|
if(remotingRes != FhgfsOpsErr_SUCCESS)
|
|
resSize = FhgfsOpsErr_toSysErr(remotingRes);
|
|
|
|
return resSize;
|
|
}
|
|
|
|
/**
|
|
* Get an extended attribute of a file, copy it into a buffer; or compute the size of the buffer
|
|
* required.
|
|
* @param value Pointer to the buffer. NULL to compute the size of the buffer required.
|
|
* @param size Size of the buffer.
|
|
* @return Negative error number on failure, or the number of bytes used / required on success.
|
|
*/
|
|
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
|
|
ssize_t FhgfsOps_getxattr(struct dentry* dentry, const char* name, void* value, size_t size)
|
|
{
|
|
struct inode* inode = dentry->d_inode;
|
|
#else
|
|
ssize_t FhgfsOps_getxattr(struct inode* inode, const char* name, void* value, size_t size)
|
|
{
|
|
#endif // KERNEL_HAS_DENTRY_XATTR_HANDLER
|
|
|
|
App* app = FhgfsOps_getApp(inode->i_sb);
|
|
Config* cfg = App_getConfig(app);
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
FhgfsOpsErr remotingRes;
|
|
ssize_t resSize;
|
|
|
|
int refreshRes = maybeRefreshInode(inode, true, false, false);
|
|
if (refreshRes)
|
|
return refreshRes;
|
|
|
|
// "Just get the size, don't return any value" is signaled by value=NULL. Since we only send
|
|
// the "size" parameter to the server, we make sure that size is set to 0 in that case.
|
|
if(!value)
|
|
size = 0;
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsInode);
|
|
|
|
remotingRes = FhgfsOpsRemoting_getXAttr(app, FhgfsInode_getEntryInfo(fhgfsInode), name, value,
|
|
size, &resSize);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsInode);
|
|
|
|
if(remotingRes != FhgfsOpsErr_SUCCESS)
|
|
resSize = FhgfsOpsErr_toSysErr(remotingRes);
|
|
else
|
|
if (Config_getSysXAttrsCheckCapabilities(cfg) == CHECKCAPABILITIES_Cache
|
|
&& resSize == 0 && !strcmp(name, XATTR_NAME_CAPS)) {
|
|
// We were looking for the xattr "security.capability" (XATTR_NAME_CAPS) and got an empty
|
|
// result, meaning that it doesn't exist. We cache that result via the weirdly named function
|
|
// "inode_has_no_xattr" which does exactly one thing and that is to set the flag "S_NOSEC"
|
|
// on the inode, if the inode doesn't have the setuid and setgid bits set and the superblock
|
|
// flag "SB_NOSEC" is set. We set that flag on the superblock according to user configuration.
|
|
// When "S_NOSEC" is set on the inode, the kernel will skip checking for capabilities on
|
|
// every write operation.
|
|
inode_has_no_xattr(inode);
|
|
}
|
|
|
|
return resSize;
|
|
}
|
|
|
|
/**
|
|
* Remove an extended attribute from a file.
|
|
*/
|
|
int FhgfsOps_removexattr(struct dentry* dentry, const char* name)
|
|
{
|
|
App* app = FhgfsOps_getApp(dentry->d_sb);
|
|
|
|
FhgfsOpsHelper_logOpDebug(app, dentry, NULL, __func__, "(name: %s)", name);
|
|
(void) app;
|
|
|
|
return FhgfsOps_removexattrInode(dentry->d_inode, name);
|
|
}
|
|
|
|
int FhgfsOps_removexattrInode(struct inode* inode, const char* name)
|
|
{
|
|
App* app = FhgfsOps_getApp(inode->i_sb);
|
|
FhgfsOpsErr remotingRes;
|
|
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
|
|
int refreshRes = maybeRefreshInode(inode, true, false, false);
|
|
if (refreshRes)
|
|
return refreshRes;
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsInode);
|
|
|
|
remotingRes = FhgfsOpsRemoting_removeXAttr(app, FhgfsInode_getEntryInfo(fhgfsInode), name);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsInode);
|
|
|
|
if(remotingRes != FhgfsOpsErr_SUCCESS)
|
|
return FhgfsOpsErr_toSysErr(remotingRes);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Set an extended attribute for a file.
|
|
*/
|
|
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
|
|
int FhgfsOps_setxattr(struct dentry* dentry, const char* name, const void* value, size_t size,
|
|
int flags)
|
|
{
|
|
struct inode* inode = dentry->d_inode;
|
|
#else
|
|
int FhgfsOps_setxattr(struct inode* inode, const char* name, const void* value, size_t size,
|
|
int flags)
|
|
{
|
|
#endif // KERNEL_HAS_DENTRY_XATTR_HANDLER
|
|
|
|
App* app = FhgfsOps_getApp(inode->i_sb);
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
FhgfsOpsErr remotingRes;
|
|
|
|
int refreshRes = maybeRefreshInode(inode, true, false, false);
|
|
if (refreshRes)
|
|
return refreshRes;
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsInode);
|
|
|
|
remotingRes = FhgfsOpsRemoting_setXAttr(app, FhgfsInode_getEntryInfo(fhgfsInode),
|
|
name, value, size, flags);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsInode);
|
|
|
|
if(remotingRes != FhgfsOpsErr_SUCCESS)
|
|
return FhgfsOpsErr_toSysErr(remotingRes);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct posix_acl* Fhgfs_get_acl(struct inode* inode, int type)
|
|
{
|
|
App* app = FhgfsOps_getApp(inode->i_sb);
|
|
|
|
struct posix_acl* res = NULL;
|
|
char* xAttrName;
|
|
FhgfsOpsErr remotingRes;
|
|
size_t remotingResSize;
|
|
size_t xAttrSize;
|
|
char* xAttrBuf = NULL;
|
|
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
const EntryInfo* entryInfo = FhgfsInode_getEntryInfo(fhgfsInode);
|
|
|
|
int refreshRes;
|
|
|
|
forget_all_cached_acls(inode);
|
|
|
|
refreshRes = maybeRefreshInode(inode, true, false, false);
|
|
if (refreshRes)
|
|
return ERR_PTR(refreshRes);
|
|
|
|
if(type == ACL_TYPE_ACCESS)
|
|
xAttrName = XATTR_NAME_POSIX_ACL_ACCESS;
|
|
else if(type == ACL_TYPE_DEFAULT)
|
|
xAttrName = XATTR_NAME_POSIX_ACL_DEFAULT;
|
|
else
|
|
return ERR_PTR(-EOPNOTSUPP);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsInode);
|
|
|
|
// read extended attributes from the file
|
|
remotingRes = FhgfsOpsRemoting_getXAttr(app, entryInfo, xAttrName, NULL, 0, &remotingResSize);
|
|
|
|
if(remotingRes != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
int sysErr = FhgfsOpsErr_toSysErr(remotingRes);
|
|
|
|
if (sysErr == -ENODATA || sysErr == -ENOSYS)
|
|
res = NULL;
|
|
else
|
|
res = ERR_PTR(sysErr);
|
|
|
|
goto cleanup;
|
|
}
|
|
|
|
xAttrBuf = os_kmalloc(remotingResSize);
|
|
if(!xAttrBuf)
|
|
{
|
|
res = ERR_PTR(-ENOMEM);
|
|
goto cleanup;
|
|
}
|
|
|
|
xAttrSize = remotingResSize;
|
|
|
|
remotingRes = FhgfsOpsRemoting_getXAttr(app, entryInfo, xAttrName, xAttrBuf, xAttrSize,
|
|
&remotingResSize);
|
|
|
|
if(remotingRes != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
res = ERR_PTR(FhgfsOpsErr_toSysErr(remotingRes) );
|
|
goto cleanup;
|
|
}
|
|
|
|
// determine posix_acl from extended attributes
|
|
res = os_posix_acl_from_xattr(xAttrBuf, remotingResSize);
|
|
|
|
cleanup:
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsInode);
|
|
kfree(xAttrBuf);
|
|
return res;
|
|
}
|
|
|
|
/* Kernel API changes for get_acl() and get_inode_acl()
|
|
* 6.3
|
|
* struct posix_acl * (*get_inode_acl)(struct inode *, int, bool);
|
|
* struct posix_acl *(*get_acl)(struct mnt_idmap *, struct dentry *, int);
|
|
*
|
|
* 6.2
|
|
* get_acl() was introduced as get_inode_acl() but both interfaces still provided
|
|
* struct posix_acl * (*get_inode_acl)(struct inode *, int, bool);
|
|
* struct posix_acl *(*get_acl)(struct user_namespace *, struct dentry *, int);
|
|
*
|
|
* 5.15
|
|
* struct posix_acl * (*get_acl)(struct inode *, int type, bool rcu);
|
|
*
|
|
* 3.2
|
|
* struct posix_acl * (*get_acl)(struct inode *, int);
|
|
*/
|
|
#if defined(KERNEL_HAS_POSIX_GET_ACL_IDMAP)
|
|
struct posix_acl * FhgfsOps_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type)
|
|
{
|
|
struct inode* inode = dentry->d_inode;
|
|
bool rcu = 0;
|
|
#elif defined(KERNEL_HAS_POSIX_GET_ACL_NS)
|
|
struct posix_acl * FhgfsOps_get_acl(struct user_namespace *userns, struct dentry *dentry, int type)
|
|
{
|
|
struct inode* inode = dentry->d_inode;
|
|
bool rcu = 0;
|
|
#elif defined(KERNEL_POSIX_GET_ACL_HAS_RCU)
|
|
struct posix_acl* FhgfsOps_get_acl(struct inode* inode, int type, bool rcu)
|
|
{
|
|
#else
|
|
struct posix_acl* FhgfsOps_get_acl(struct inode* inode, int type)
|
|
{
|
|
#endif
|
|
#if defined(KERNEL_POSIX_GET_ACL_HAS_RCU) || defined(KERNEL_HAS_GET_INODE_ACL)
|
|
if (rcu)
|
|
return ERR_PTR(-ECHILD);
|
|
#endif // KERNEL_POSIX_GET_ACL_HAS_RCU
|
|
|
|
return Fhgfs_get_acl(inode, type);
|
|
}
|
|
|
|
|
|
#if defined(KERNEL_HAS_GET_INODE_ACL)
|
|
struct posix_acl* FhgfsOps_get_inode_acl(struct inode* inode, int type, bool rcu)
|
|
{
|
|
return Fhgfs_get_acl(inode, type);
|
|
}
|
|
#endif
|
|
|
|
#if defined(KERNEL_HAS_SET_ACL)
|
|
|
|
#if defined(KERNEL_HAS_SET_ACL_DENTRY) && defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
|
extern int FhgfsOps_set_acl(struct mnt_idmap* mnt_userns, struct dentry* dentry,
|
|
struct posix_acl* acl, int type)
|
|
{
|
|
struct inode* inode = d_inode(dentry);
|
|
#elif defined(KERNEL_HAS_SET_ACL_DENTRY) && defined(KERNEL_HAS_USER_NS_MOUNTS)
|
|
extern int FhgfsOps_set_acl(struct user_namespace* mnt_userns, struct dentry* dentry,
|
|
struct posix_acl* acl, int type)
|
|
{
|
|
struct inode* inode = d_inode(dentry);
|
|
#elif defined(KERNEL_HAS_SET_ACL_NS_INODE)
|
|
extern int FhgfsOps_set_acl(struct user_namespace* mnt_userns, struct inode* inode,
|
|
struct posix_acl* acl, int type)
|
|
{
|
|
#else
|
|
extern int FhgfsOps_set_acl(struct inode* inode, struct posix_acl* acl, int type)
|
|
{
|
|
#endif // KERNEL_HAS_SET_ACL_DENTRY
|
|
#endif // KERNEL_HAS_SET_ACL
|
|
App* app = FhgfsOps_getApp(inode->i_sb);
|
|
int res;
|
|
FhgfsOpsErr remotingRes;
|
|
char* xAttrName;
|
|
int xAttrBufLen;
|
|
void* xAttrBuf = NULL;
|
|
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
const EntryInfo* entryInfo = FhgfsInode_getEntryInfo(fhgfsInode);
|
|
|
|
int refreshRes = maybeRefreshInode(inode, true, false, false);
|
|
if (refreshRes)
|
|
return refreshRes;
|
|
|
|
if (type == ACL_TYPE_ACCESS)
|
|
{
|
|
#ifdef KERNEL_HAS_SET_ACL_DENTRY
|
|
if (acl)
|
|
{
|
|
int ret = 0;
|
|
struct iattr iattr;
|
|
int setAttrRes;
|
|
|
|
memset(&iattr, 0, sizeof iattr);
|
|
|
|
/* Update the file mode when setting an ACL: compute the new file permission
|
|
* bits based on the ACL.
|
|
*/
|
|
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
|
ret = posix_acl_update_mode(&nop_mnt_idmap, inode, &iattr.ia_mode, &acl);
|
|
#else
|
|
ret = posix_acl_update_mode(&init_user_ns, inode, &iattr.ia_mode, &acl);
|
|
#endif
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (inode->i_mode != iattr.ia_mode)
|
|
{
|
|
iattr.ia_valid = ATTR_MODE; //update the file mode permission bit
|
|
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
|
setAttrRes = FhgfsOps_setattr(&nop_mnt_idmap, dentry, &iattr);
|
|
#else
|
|
setAttrRes = FhgfsOps_setattr(&init_user_ns, dentry, &iattr);
|
|
#endif
|
|
if(setAttrRes < 0)
|
|
return setAttrRes;
|
|
}
|
|
}
|
|
#endif //KERNEL_HAS_SET_ACL_DENTRY
|
|
xAttrName = XATTR_NAME_POSIX_ACL_ACCESS;
|
|
}
|
|
else if (type == ACL_TYPE_DEFAULT)
|
|
xAttrName = XATTR_NAME_POSIX_ACL_DEFAULT;
|
|
else
|
|
return -EOPNOTSUPP;
|
|
|
|
if (acl)
|
|
{
|
|
// prepare extended attribute - determine size needed for buffer.
|
|
xAttrBufLen = os_posix_acl_to_xattr(acl, NULL, 0);
|
|
|
|
if (xAttrBufLen < 0)
|
|
return xAttrBufLen;
|
|
|
|
xAttrBuf = os_kmalloc(xAttrBufLen);
|
|
if (!xAttrBuf)
|
|
return -ENOMEM;
|
|
|
|
res = os_posix_acl_to_xattr(acl, xAttrBuf, xAttrBufLen);
|
|
if (res != xAttrBufLen)
|
|
goto cleanup;
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsInode);
|
|
|
|
remotingRes = FhgfsOpsRemoting_setXAttr(app, entryInfo, xAttrName, xAttrBuf, xAttrBufLen, 0);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsInode);
|
|
}
|
|
else
|
|
{
|
|
FhgfsInode_entryInfoReadLock(fhgfsInode);
|
|
|
|
remotingRes = FhgfsOpsRemoting_removeXAttr(app, entryInfo, xAttrName);
|
|
if (remotingRes == FhgfsOpsErr_NODATA)
|
|
remotingRes = FhgfsOpsErr_SUCCESS;
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsInode);
|
|
}
|
|
|
|
if (remotingRes != FhgfsOpsErr_SUCCESS)
|
|
res = FhgfsOpsErr_toSysErr(remotingRes);
|
|
else
|
|
res = 0;
|
|
|
|
cleanup:
|
|
kfree(xAttrBuf);
|
|
|
|
return res;
|
|
}
|
|
|
|
#ifdef KERNEL_HAS_GET_ACL
|
|
/**
|
|
* Update the ACL of an inode after a chmod
|
|
*/
|
|
int FhgfsOps_aclChmod(struct iattr* iattr, struct dentry* dentry)
|
|
{
|
|
#if defined(KERNEL_HAS_SET_ACL) || defined(KERNEL_HAS_SET_ACL_DENTRY)
|
|
if (iattr->ia_valid & ATTR_MODE)
|
|
return os_posix_acl_chmod(dentry, iattr->ia_mode);
|
|
else
|
|
return 0;
|
|
#else
|
|
struct posix_acl* acl;
|
|
int res;
|
|
|
|
void* xAttrBuf;
|
|
int xAttrBufLen;
|
|
|
|
if( !(iattr->ia_valid & ATTR_MODE) ) // don't have to do anything if the mode hasn't changed
|
|
return 0;
|
|
|
|
#if defined(KERNEL_HAS_POSIX_GET_ACL_IDMAP)
|
|
acl = FhgfsOps_get_acl(&nop_mnt_idmap, dentry, ACL_TYPE_ACCESS);
|
|
#elif defined(KERNEL_HAS_POSIX_GET_ACL_NS)
|
|
acl = FhgfsOps_get_acl(&init_user_ns, dentry, ACL_TYPE_ACCESS);
|
|
#else
|
|
acl = FhgfsOps_get_acl(dentry->d_inode, ACL_TYPE_ACCESS);
|
|
#endif
|
|
|
|
if(PTR_ERR(acl) == -ENODATA) // entry doesn't have an ACL. We don't have to do anything.
|
|
return 0;
|
|
else if(IS_ERR(acl) ) // some error occured - pass the error code on to the caller.
|
|
return PTR_ERR(acl);
|
|
|
|
// We have an actual ACL for that entry, so we need to update it.
|
|
res = posix_acl_chmod(&acl, GFP_KERNEL, iattr->ia_mode);
|
|
|
|
if(res != 0)
|
|
goto cleanup;
|
|
|
|
// Set the ACL Xattr.
|
|
xAttrBufLen = os_posix_acl_to_xattr(acl, NULL, 0); // Determine size needed for buffer.
|
|
|
|
if(xAttrBufLen < 0)
|
|
{
|
|
res = xAttrBufLen;
|
|
goto cleanup;
|
|
}
|
|
|
|
xAttrBuf = os_kmalloc(xAttrBufLen);
|
|
if(!xAttrBuf)
|
|
{
|
|
res = -ENOMEM;
|
|
goto cleanup;
|
|
}
|
|
|
|
res = os_posix_acl_to_xattr(acl, xAttrBuf, xAttrBufLen);
|
|
if(res != xAttrBufLen)
|
|
goto buf_cleanup; // if it's not the same as xAttrBufLen, it is -ERANGE - so no need to modify res
|
|
|
|
// We call FhgfsOps_setxattr directly instead of using the XAttr handler
|
|
// because the handler would try to chmod again.
|
|
res = FhgfsOps_setxattr(dentry, XATTR_NAME_POSIX_ACL_ACCESS, xAttrBuf, xAttrBufLen, 0);
|
|
|
|
buf_cleanup:
|
|
kfree(xAttrBuf);
|
|
|
|
cleanup:
|
|
posix_acl_release(acl);
|
|
|
|
return res;
|
|
#endif // LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
|
|
}
|
|
#endif // KERNEL_HAS_GET_ACL
|
|
|
|
/**
|
|
* @return 0 on success, negative linux error code otherwise
|
|
*/
|
|
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
|
extern int FhgfsOps_setattr(struct mnt_idmap* idmap, struct dentry* dentry,
|
|
struct iattr* iattr)
|
|
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
|
extern int FhgfsOps_setattr(struct user_namespace* mnt_userns, struct dentry* dentry,
|
|
struct iattr* iattr)
|
|
#else
|
|
int FhgfsOps_setattr(struct dentry* dentry, struct iattr* iattr)
|
|
#endif
|
|
{
|
|
// note: called for chmod(), chown(), utime(), truncate()
|
|
// note: changed fields can be determined by the iattr->ia_valid flags (ATTR_...)
|
|
|
|
App* app = FhgfsOps_getApp(dentry->d_sb);
|
|
Config* cfg = App_getConfig(app);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_setattr";
|
|
|
|
int retVal = 0;
|
|
int setAttrPrepRes;
|
|
SettableFileAttribs fhgfsAttr;
|
|
int validFhgfsAttribs;
|
|
FhgfsOpsErr setAttrRes = FhgfsOpsErr_SUCCESS;
|
|
struct inode* inode = dentry->d_inode;
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
struct FileEvent event = FILEEVENT_EMPTY;
|
|
|
|
if(unlikely(Logger_getLogLevel(log) >= 5) )
|
|
{
|
|
FhgfsOpsHelper_logOpMsg(Log_SPAM, app, dentry, inode, logContext,
|
|
(iattr->ia_valid & ATTR_SIZE) ? "(with trunc)" : "");
|
|
}
|
|
|
|
#ifdef KERNEL_HAS_SETATTR_PREPARE
|
|
setAttrPrepRes = os_setattr_prepare(dentry, iattr);
|
|
#else
|
|
setAttrPrepRes = inode_change_ok(inode, iattr);
|
|
#endif
|
|
if(setAttrPrepRes < 0)
|
|
return setAttrPrepRes;
|
|
|
|
/* we do trunc during open message on meta server, so we don't want this redundant trunc
|
|
(and ctime/mtime update) from the kernel */
|
|
if(iattr->ia_valid & ATTR_OPEN)
|
|
{
|
|
if (iattr->ia_valid & ATTR_SIZE)
|
|
{ // the file was already remotely truncate by open, now also truncate it locally
|
|
FhgfsOps_vmtruncate(inode, iattr->ia_size);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
if(iattr->ia_valid & ATTR_SIZE)
|
|
{ // make sure we only update size of regular files
|
|
if(!S_ISREG(inode->i_mode) )
|
|
iattr->ia_valid &= ~ATTR_SIZE;
|
|
}
|
|
|
|
if(S_ISREG(inode->i_mode) )
|
|
{ // flush all dirty file data
|
|
filemap_write_and_wait(inode->i_mapping);
|
|
|
|
if(Config_getTuneFileCacheTypeNum(cfg) == FILECACHETYPE_Buffered)
|
|
FhgfsOpsHelper_flushCache(app, fhgfsInode, false);
|
|
}
|
|
|
|
OsTypeConv_iattrOsToFhgfs(iattr, &fhgfsAttr, &validFhgfsAttribs);
|
|
|
|
if(validFhgfsAttribs)
|
|
{
|
|
const struct FileEvent* eventSent = NULL;
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsInode); // LOCK EntryInfo
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_SETATTR)
|
|
{
|
|
FileEvent_init(&event, FileEventType_SETATTR, dentry);
|
|
eventSent = &event;
|
|
}
|
|
|
|
setAttrRes = FhgfsOpsRemoting_setAttr(app, FhgfsInode_getEntryInfo(fhgfsInode), &fhgfsAttr,
|
|
validFhgfsAttribs, eventSent);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsInode); // UNLOCK EntryInfo
|
|
}
|
|
|
|
if(setAttrRes == FhgfsOpsErr_SUCCESS)
|
|
{
|
|
const struct FileEvent* eventSent = NULL;
|
|
|
|
if (validFhgfsAttribs)
|
|
{
|
|
FhgfsOps_newAttrToInode(iattr, inode);
|
|
|
|
#ifdef KERNEL_HAS_GET_ACL
|
|
if (Config_getSysACLsEnabled(cfg) )
|
|
{
|
|
int aclRes = FhgfsOps_aclChmod(iattr, dentry);
|
|
if(aclRes < 0)
|
|
{
|
|
Logger_logFormatted(log, Log_ERR, logContext, "ACL chmod failed for '%s' (mode: 0%o), error: %d",
|
|
dentry->d_name.name, iattr->ia_mode, aclRes);
|
|
return aclRes;
|
|
}
|
|
}
|
|
#endif // KERNEL_HAS_GET_ACL
|
|
}
|
|
|
|
// all right so far => handle truncation
|
|
if(iattr->ia_valid & ATTR_SIZE)
|
|
{
|
|
FhgfsInode_entryInfoReadLock(fhgfsInode); // LOCK EntryInfo
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_TRUNC)
|
|
{
|
|
/* if path is not set, free the resources possibly acquired by the setattr operation
|
|
* we did earlier and try again. otherwise, we have a valid path to send to the meta
|
|
* server, only the event type needs to be changed. */
|
|
if (!event.path)
|
|
{
|
|
FileEvent_uninit(&event);
|
|
FileEvent_init(&event, FileEventType_TRUNCATE, dentry);
|
|
}
|
|
else
|
|
{
|
|
event.eventType = FileEventType_TRUNCATE;
|
|
}
|
|
|
|
eventSent = &event;
|
|
}
|
|
|
|
setAttrRes = FhgfsOpsRemoting_truncfile(app, FhgfsInode_getEntryInfo(fhgfsInode),
|
|
iattr->ia_size, eventSent);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsInode); // UNLOCK EntryInfo
|
|
|
|
if(setAttrRes == FhgfsOpsErr_SUCCESS)
|
|
{
|
|
FhgfsOps_vmtruncate(inode, iattr->ia_size);
|
|
}
|
|
}
|
|
}
|
|
|
|
FileEvent_uninit(&event);
|
|
|
|
if(setAttrRes != FhgfsOpsErr_SUCCESS)
|
|
retVal = FhgfsOpsErr_toSysErr(setAttrRes);
|
|
else
|
|
FhgfsInode_invalidateCache(fhgfsInode);
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Set inode attributes after a successful setattr
|
|
*/
|
|
void FhgfsOps_newAttrToInode(struct iattr* iAttr, struct inode* outInode)
|
|
{
|
|
Time now;
|
|
#if defined(KERNEL_HAS_CURRENT_TIME_SPEC64)
|
|
struct timespec64 ts;
|
|
#else
|
|
struct timespec ts;
|
|
#endif
|
|
Time_setToNowReal(&now);
|
|
|
|
spin_lock(&outInode->i_lock);
|
|
|
|
ts.tv_sec = now.tv_sec;
|
|
ts.tv_nsec = 0;
|
|
|
|
if(iAttr->ia_valid & ATTR_MODE)
|
|
outInode->i_mode = iAttr->ia_mode;
|
|
|
|
if(iAttr->ia_valid & ATTR_UID)
|
|
outInode->i_uid = iAttr->ia_uid;
|
|
|
|
if(iAttr->ia_valid & ATTR_GID)
|
|
outInode->i_gid = iAttr->ia_gid;
|
|
|
|
if(iAttr->ia_valid & ATTR_MTIME_SET)
|
|
{
|
|
inode_set_mtime_to_ts(outInode, iAttr->ia_mtime);
|
|
}
|
|
else
|
|
if(iAttr->ia_valid & ATTR_MTIME)
|
|
{ // set mtime to "now"
|
|
inode_set_mtime_to_ts(outInode, ts);
|
|
}
|
|
|
|
if(iAttr->ia_valid & ATTR_ATIME_SET)
|
|
{
|
|
inode_set_atime_to_ts(outInode, iAttr->ia_atime);
|
|
}
|
|
else
|
|
if(iAttr->ia_valid & ATTR_ATIME)
|
|
{ // set atime to "now"
|
|
inode_set_atime_to_ts(outInode, ts);
|
|
}
|
|
|
|
if(iAttr->ia_valid & ATTR_CTIME)
|
|
{
|
|
inode_set_ctime_to_ts(outInode, ts);
|
|
}
|
|
|
|
spin_unlock(&outInode->i_lock);
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* Create directory.
|
|
*/
|
|
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
|
int FhgfsOps_mkdir(struct mnt_idmap* idmap, struct inode* dir, struct dentry* dentry,
|
|
umode_t mode)
|
|
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
|
int FhgfsOps_mkdir(struct user_namespace* mnt_userns, struct inode* dir, struct dentry* dentry,
|
|
umode_t mode)
|
|
#elif defined(KERNEL_HAS_UMODE_T)
|
|
int FhgfsOps_mkdir(struct inode* dir, struct dentry* dentry, umode_t mode)
|
|
#else
|
|
int FhgfsOps_mkdir(struct inode* dir, struct dentry* dentry, int mode)
|
|
#endif
|
|
{
|
|
struct super_block* sb = dentry->d_sb;
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_mkdir";
|
|
|
|
int retVal = 0;
|
|
FhgfsOpsErr mkRes;
|
|
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(dir);
|
|
EntryInfo newEntryInfo;
|
|
const char* entryName = dentry->d_name.name;
|
|
const int umask = current_umask();
|
|
struct CreateInfo createInfo;
|
|
struct FileEvent event = FILEEVENT_EMPTY;
|
|
const struct FileEvent* eventSent = NULL;
|
|
|
|
struct inode* inode = dentry->d_inode;
|
|
|
|
FhgfsIsizeHints iSizeHints;
|
|
|
|
if(unlikely(Logger_getLogLevel(log) >= 5) )
|
|
FhgfsOpsHelper_logOp(5, app, dentry, inode, logContext);
|
|
|
|
mode |= S_IFDIR; // just make sure this is a dir
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_LINK_OP)
|
|
{
|
|
FileEvent_init(&event, FileEventType_MKDIR, dentry);
|
|
eventSent = &event;
|
|
}
|
|
|
|
CreateInfo_init(app, dir, entryName, mode, umask, false, eventSent, &createInfo);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsParentInode); // LOCK EntryInfo
|
|
|
|
mkRes = FhgfsOpsRemoting_mkdir(app, FhgfsInode_getEntryInfo(fhgfsParentInode), &createInfo,
|
|
&newEntryInfo);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
|
|
if(mkRes != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
retVal = FhgfsOpsErr_toSysErr(mkRes);
|
|
}
|
|
else
|
|
{ // remote success => create the local inode
|
|
retVal = __FhgfsOps_instantiateInode(dentry, &newEntryInfo, NULL, &iSizeHints);
|
|
|
|
inode_set_mc_time(dir, current_fs_time(sb));
|
|
}
|
|
|
|
FileEvent_uninit(&event);
|
|
|
|
return retVal;
|
|
}
|
|
|
|
int FhgfsOps_rmdir(struct inode* dir, struct dentry* dentry)
|
|
{
|
|
struct super_block* sb = dentry->d_sb;
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_rmdir";
|
|
|
|
int retVal = 0;
|
|
FhgfsOpsErr rmRes;
|
|
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(dir);
|
|
const char* entryName = dentry->d_name.name;
|
|
struct inode* inode = dentry->d_inode;
|
|
struct FileEvent event = FILEEVENT_EMPTY;
|
|
const struct FileEvent* eventSent = NULL;
|
|
|
|
if(unlikely(Logger_getLogLevel(log) >= 5) )
|
|
FhgfsOpsHelper_logOp(5, app, dentry, inode, logContext);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsParentInode); // LOCK EntryInfo
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_LINK_OP)
|
|
{
|
|
FileEvent_init(&event, FileEventType_RMDIR, dentry);
|
|
eventSent = &event;
|
|
}
|
|
|
|
rmRes = FhgfsOpsRemoting_rmdir(app, FhgfsInode_getEntryInfo(fhgfsParentInode), entryName,
|
|
eventSent);
|
|
|
|
FileEvent_uninit(&event);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
|
|
if(rmRes != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
retVal = FhgfsOpsErr_toSysErr(rmRes);
|
|
}
|
|
else
|
|
{ // remote success
|
|
clear_nlink(dentry->d_inode);
|
|
|
|
inode_set_mc_time(dir, current_fs_time(sb));
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Create file based on combined intent message.
|
|
*
|
|
* @param isExclusiveCreate true if this is an exclusive (O_EXCL) create
|
|
*/
|
|
#if defined KERNEL_HAS_ATOMIC_OPEN
|
|
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
|
int FhgfsOps_createIntent(struct mnt_idmap* idmap, struct inode* dir,
|
|
struct dentry* dentry, umode_t createMode, bool isExclusiveCreate)
|
|
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
|
int FhgfsOps_createIntent(struct user_namespace* mnt_userns, struct inode* dir,
|
|
struct dentry* dentry, umode_t createMode, bool isExclusiveCreate)
|
|
#else
|
|
int FhgfsOps_createIntent(struct inode* dir, struct dentry* dentry, umode_t createMode,
|
|
bool isExclusiveCreate)
|
|
#endif
|
|
#elif defined KERNEL_HAS_UMODE_T
|
|
int FhgfsOps_createIntent(struct inode* dir, struct dentry* dentry, umode_t createMode,
|
|
struct nameidata* nameidata)
|
|
#else
|
|
int FhgfsOps_createIntent(struct inode* dir, struct dentry* dentry, int createMode,
|
|
struct nameidata* nameidata)
|
|
#endif // KERNEL_HAS_ATOMIC_OPEN
|
|
{
|
|
struct super_block* sb = dentry->d_sb;
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_createIntent";
|
|
|
|
int retVal = 0;
|
|
FhgfsOpsErr remotingRes;
|
|
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(dir);
|
|
EntryInfo newEntryInfo;
|
|
const char* entryName = dentry->d_name.name;
|
|
const int currentUmask = current_umask();
|
|
fhgfs_stat statData;
|
|
CreateInfo createInfo;
|
|
OpenInfo openInfo;
|
|
LookupIntentInfoIn inInfo; // input data for combo-request
|
|
LookupIntentInfoOut lookupOutInfo; // result data of combo-request
|
|
struct inode* inode = dentry->d_inode;
|
|
struct FileEvent event = FILEEVENT_EMPTY;
|
|
const struct FileEvent* eventSent = NULL;
|
|
|
|
FhgfsIsizeHints iSizeHints;
|
|
|
|
#ifndef KERNEL_HAS_ATOMIC_OPEN
|
|
bool isExclusiveCreate = (nameidata && (nameidata->intent.open.flags & O_EXCL) );
|
|
int openFlags = (nameidata && nameidata->flags & LOOKUP_OPEN) ?
|
|
nameidata->intent.open.flags : 0;
|
|
#else
|
|
int openFlags = 0;
|
|
#endif // LINUX_VERSION_CODE
|
|
|
|
FhgfsOpsHelper_logOp(4, app, dentry, inode, logContext);
|
|
|
|
if(unlikely(dentry->d_name.len > NAME_MAX) )
|
|
return -ENAMETOOLONG;
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsParentInode); // LOCK EntryInfo
|
|
|
|
if(unlikely(!S_ISREG(createMode) ) )
|
|
return -EINVAL; // just make sure we create a regular file here
|
|
|
|
FhgfsInode_initIsizeHints(NULL, &iSizeHints);
|
|
|
|
LookupIntentInfoIn_init(&inInfo, FhgfsInode_getEntryInfo(fhgfsParentInode), entryName);
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_LINK_OP)
|
|
{
|
|
FileEvent_init(&event, FileEventType_CREATE, dentry);
|
|
eventSent = &event;
|
|
}
|
|
|
|
CreateInfo_init(app, dir, entryName, createMode, currentUmask, isExclusiveCreate, eventSent,
|
|
&createInfo);
|
|
LookupIntentInfoIn_addCreateInfo(&inInfo, &createInfo);
|
|
|
|
if (openFlags)
|
|
{
|
|
OpenInfo_init(&openInfo, openFlags, __FhgfsOps_isPagedMode(sb) );
|
|
LookupIntentInfoIn_addOpenInfo(&inInfo, &openInfo);
|
|
}
|
|
|
|
LookupIntentInfoOut_prepare(&lookupOutInfo, &newEntryInfo, &statData);
|
|
|
|
remotingRes = FhgfsOpsRemoting_lookupIntent(app, &inInfo, &lookupOutInfo);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
|
|
if (unlikely(remotingRes != FhgfsOpsErr_SUCCESS) )
|
|
{
|
|
d_drop(dentry); // avoid leaving a negative dentry behind
|
|
retVal = FhgfsOpsErr_toSysErr(remotingRes);
|
|
goto outErr;
|
|
}
|
|
|
|
if (unlikely(lookupOutInfo.createRes != FhgfsOpsErr_SUCCESS) )
|
|
{
|
|
d_drop(dentry); // avoid leaving a negative dentry behind
|
|
retVal = FhgfsOpsErr_toSysErr(lookupOutInfo.createRes);
|
|
goto outErr;
|
|
}
|
|
|
|
if (unlikely(lookupOutInfo.statRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // something went wrong before the server could stat the file
|
|
|
|
if (!(lookupOutInfo.responseFlags & LOOKUPINTENTRESPMSG_FLAG_STAT) )
|
|
Logger_logErrFormatted(log, logContext, "Unexpected stat info missing: %s",
|
|
createInfo.entryName);
|
|
|
|
|
|
d_drop(dentry); // avoid leaving a negative dentry behind
|
|
retVal = FhgfsOpsErr_toSysErr(lookupOutInfo.statRes);
|
|
goto outErr;
|
|
}
|
|
|
|
if(unlikely(
|
|
lookupOutInfo.stripePattern &&
|
|
(StripePattern_getPatternType(lookupOutInfo.stripePattern) == STRIPEPATTERN_Invalid) ) )
|
|
{ // unknown/invalid stripe pattern
|
|
Logger_logErrFormatted(log, logContext, "Entry has invalid/unknown stripe pattern type: %s",
|
|
createInfo.entryName);
|
|
|
|
d_drop(dentry); // avoid leaving a negative dentry behind
|
|
retVal = FhgfsOpsErr_toSysErr(FhgfsOpsErr_INTERNAL);
|
|
goto outErr;
|
|
}
|
|
|
|
// remote success => create local inode
|
|
retVal = __FhgfsOps_instantiateInode(dentry, lookupOutInfo.entryInfoPtr,
|
|
lookupOutInfo.fhgfsStat, &iSizeHints);
|
|
LookupIntentInfoOut_setEntryInfoPtr(&lookupOutInfo, NULL); /* Make sure entryInfo will not be
|
|
* de-initialized */
|
|
inode_set_mc_time(dir, current_fs_time(sb));
|
|
|
|
#ifndef KERNEL_HAS_ATOMIC_OPEN
|
|
if (lookupOutInfo.openRes == FhgfsOpsErr_SUCCESS)
|
|
{
|
|
struct file* file;
|
|
struct inode* newInode;
|
|
int openHandleRes;
|
|
|
|
file = lookup_instantiate_filp(nameidata, dentry, generic_file_open);
|
|
if (IS_ERR(file) )
|
|
{
|
|
retVal = PTR_ERR(file);
|
|
goto outErr;
|
|
}
|
|
|
|
newInode = dentry->d_inode;
|
|
|
|
openHandleRes = FhgfsOps_openReferenceHandle(app, newInode, file, openFlags,
|
|
&lookupOutInfo, NULL);
|
|
if (unlikely(openHandleRes) )
|
|
{ // failed to get an fhgfs internal handle
|
|
// printk_fhgfs_debug(KERN_INFO, "Reference handle failed\n");
|
|
|
|
fput(file);
|
|
|
|
retVal = openHandleRes;
|
|
goto outErr;
|
|
}
|
|
|
|
LookupIntentInfoOut_setStripePattern(&lookupOutInfo, NULL);
|
|
|
|
}
|
|
#endif // KERNEL_HAS_ATOMIC_OPEN
|
|
|
|
// clean-up
|
|
|
|
LookupIntentInfoOut_uninit(&lookupOutInfo);
|
|
FileEvent_uninit(&event);
|
|
|
|
retVal = 0; // success
|
|
|
|
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext, "complete. result: %d", retVal);
|
|
|
|
return retVal;
|
|
|
|
|
|
|
|
outErr:
|
|
|
|
// clean-up on error
|
|
|
|
if (unlikely(lookupOutInfo.openRes == FhgfsOpsErr_SUCCESS) )
|
|
{
|
|
RemotingIOInfo closeInfo;
|
|
FhgfsOpsErr closeRes;
|
|
const EntryInfo* parentEntryInfo = FhgfsInode_getEntryInfo(fhgfsParentInode);
|
|
AtomicInt maxUsedTargetIndex;
|
|
|
|
AtomicInt_init(&maxUsedTargetIndex, 0); // file was not open to user space yet, so 0
|
|
|
|
RemotingIOInfo_initSpecialClose(app, lookupOutInfo.fileHandleID,
|
|
&maxUsedTargetIndex, openFlags, &closeInfo);
|
|
|
|
closeRes = FhgfsOpsHelper_closefileWithAsyncRetry(lookupOutInfo.entryInfoPtr, &closeInfo,
|
|
NULL);
|
|
if (closeRes != FhgfsOpsErr_SUCCESS)
|
|
Logger_logErrFormatted(log, logContext, "Close on error: Failed to close file "
|
|
"parentID: %s fileName: %s", EntryInfo_getEntryID(parentEntryInfo), entryName);
|
|
}
|
|
|
|
LookupIntentInfoOut_uninit(&lookupOutInfo);
|
|
FileEvent_uninit(&event);
|
|
|
|
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext, "complete. result: %d", retVal);
|
|
|
|
return retVal;
|
|
}
|
|
|
|
|
|
#ifdef KERNEL_HAS_ATOMIC_OPEN
|
|
/**
|
|
* Lookup/create/open file
|
|
*
|
|
* Note: O_CREAT is not required to be set, so we must also handle a lookup-open.
|
|
* Note2: Inode permissions are handled in vfs' atomic_open(), once we have successfully opened
|
|
* the file. If userspace is not allowed to open the file, vfs will immediately send a close
|
|
* request.
|
|
* Note3: As this is a general lookup, it can also be called for directories, symlinks etc.
|
|
*
|
|
* Warning: This is currently only enabled when define BEEGFS_ENABLE_ATOMIC_OPEN is set, because
|
|
* a) dentries come out as d_unhashed() here, which should not happen.
|
|
* b) users reported hangs with 3.10 lt elrepo kernel when doing fcntl locking stresstests with
|
|
* global locks enabled.
|
|
*/
|
|
int FhgfsOps_atomicOpen(struct inode* dir, struct dentry* dentry, struct file* file,
|
|
unsigned openFlags, umode_t createMode
|
|
#ifndef FMODE_CREATED
|
|
, int* outOpenedFlags
|
|
#endif
|
|
)
|
|
{
|
|
struct super_block* sb = dentry->d_sb;
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_atomicOpen";
|
|
|
|
int retVal = -EINVAL; // any error is fine by default
|
|
FhgfsOpsErr remotingRes;
|
|
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(dir);
|
|
EntryInfo newEntryInfo;
|
|
struct inode* newInode;
|
|
const char* entryName = dentry->d_name.name;
|
|
const int createUmask = current_umask();
|
|
struct inode* inode = dentry->d_inode;
|
|
FhgfsInode* fhgfsInode;
|
|
|
|
|
|
CreateInfo createInfo;
|
|
OpenInfo openInfo;
|
|
|
|
LookupIntentInfoIn inInfo; // input data for combo-request
|
|
LookupIntentInfoOut lookupOutInfo; // result data of combo-request
|
|
|
|
fhgfs_stat statData; // passed to lookupOutInfo
|
|
fhgfs_stat* statDataPtr;
|
|
|
|
int openHandleRes;
|
|
int instantiateRes;
|
|
|
|
FhgfsIsizeHints iSizeHints;
|
|
|
|
bool isCreate = !!(openFlags & O_CREAT);
|
|
|
|
FhgfsOpsHelper_logOp(4, app, dentry, inode, logContext);
|
|
|
|
if(unlikely(dentry->d_name.len > NAME_MAX) )
|
|
return -ENAMETOOLONG;
|
|
|
|
if (inode)
|
|
fhgfsInode = BEEGFS_INODE(inode);
|
|
else
|
|
fhgfsInode = NULL;
|
|
|
|
FhgfsInode_initIsizeHints(fhgfsInode, &iSizeHints);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsParentInode); // LOCK EntryInfo
|
|
|
|
LookupIntentInfoIn_init(&inInfo, FhgfsInode_getEntryInfo(fhgfsParentInode), entryName);
|
|
|
|
if (isCreate)
|
|
{
|
|
bool isExclusiveCreate = !!(openFlags & O_EXCL);
|
|
|
|
if (!(createMode & S_IFREG) )
|
|
{
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
return -EINVAL; // we can still easily return here, no need to free something yet
|
|
}
|
|
|
|
// XXX event
|
|
CreateInfo_init(app, dir, entryName, createMode, createUmask, isExclusiveCreate, NULL,
|
|
&createInfo);
|
|
LookupIntentInfoIn_addCreateInfo(&inInfo, &createInfo);
|
|
}
|
|
|
|
OpenInfo_init(&openInfo, openFlags, __FhgfsOps_isPagedMode(sb) );
|
|
LookupIntentInfoIn_addOpenInfo(&inInfo, &openInfo);
|
|
|
|
LookupIntentInfoOut_prepare(&lookupOutInfo, &newEntryInfo, &statData);
|
|
|
|
remotingRes = FhgfsOpsRemoting_lookupIntent(app, &inInfo, &lookupOutInfo); // remote call
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
|
|
if (unlikely(remotingRes != FhgfsOpsErr_SUCCESS) )
|
|
{
|
|
retVal = FhgfsOpsErr_toSysErr(remotingRes);
|
|
goto outErr;
|
|
}
|
|
|
|
if (!isCreate && lookupOutInfo.lookupRes != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
retVal = FhgfsOpsErr_toSysErr(lookupOutInfo.lookupRes);
|
|
goto outErr;
|
|
}
|
|
else
|
|
if (lookupOutInfo.statRes != FhgfsOpsErr_SUCCESS)
|
|
{ // something went wrong before the server could stat the file
|
|
|
|
// d_drop(dentry); // avoid leaving a negative dentry behind. Don't for atomic_open!
|
|
|
|
if (lookupOutInfo.statRes == FhgfsOpsErr_NOTOWNER)
|
|
{ // directory or inter-dir hardlink
|
|
statDataPtr = NULL; // force a stat call to the real owner
|
|
}
|
|
else
|
|
{
|
|
retVal = FhgfsOpsErr_toSysErr(lookupOutInfo.statRes);
|
|
goto outErr;
|
|
}
|
|
}
|
|
else
|
|
{ // stat success
|
|
statDataPtr = lookupOutInfo.fhgfsStat;
|
|
}
|
|
|
|
if (isCreate)
|
|
{
|
|
if (lookupOutInfo.createRes == FhgfsOpsErr_SUCCESS) // implies isCreate == true
|
|
{
|
|
#ifdef FMODE_CREATED
|
|
file->f_mode |= FMODE_CREATED;
|
|
#else
|
|
*outOpenedFlags |= FILE_CREATED;
|
|
#endif
|
|
|
|
if (lookupOutInfo.lookupRes != FhgfsOpsErr_SUCCESS)
|
|
{ // only update directory time stamps if the file did not exist yet
|
|
inode_set_mc_time(dir, current_fs_time(sb));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
retVal = FhgfsOpsErr_toSysErr(lookupOutInfo.createRes);
|
|
goto outErr;
|
|
}
|
|
}
|
|
|
|
|
|
// remote success => create local inode
|
|
instantiateRes = __FhgfsOps_instantiateInode(dentry, lookupOutInfo.entryInfoPtr, statDataPtr,
|
|
&iSizeHints);
|
|
LookupIntentInfoOut_setEntryInfoPtr(&lookupOutInfo, NULL); /* Make sure entryInfo will not be
|
|
* de-initialized */
|
|
|
|
if (unlikely(instantiateRes) )
|
|
{ // instantiate error
|
|
retVal = instantiateRes;
|
|
goto outErr;
|
|
}
|
|
|
|
// instantiate success
|
|
|
|
if (lookupOutInfo.openRes != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
retVal = FhgfsOpsErr_toSysErr(lookupOutInfo.openRes);
|
|
goto outLookupSuccessOpenFailure;
|
|
}
|
|
|
|
// remote open success
|
|
|
|
file->f_path.dentry = dentry; /* Assign the dentry, finish open does that, but we
|
|
* already need it in openReferenceHandle() */
|
|
|
|
newInode = dentry->d_inode;
|
|
|
|
openHandleRes = FhgfsOps_openReferenceHandle(app, newInode, file, openFlags,
|
|
&lookupOutInfo, NULL);
|
|
if (unlikely(openHandleRes) )
|
|
{ // failed to get an fhgfs internal handle
|
|
retVal = openHandleRes;
|
|
goto outLookupSuccessOpenFailure;
|
|
}
|
|
|
|
// internal open handle success
|
|
|
|
// stripePattern is assigned to FhgfsInode now, make sure it does not get free'ed
|
|
LookupIntentInfoOut_setStripePattern(&lookupOutInfo, NULL);
|
|
|
|
retVal = finish_open(file, dentry, generic_file_open
|
|
#ifndef FMODE_CREATED
|
|
, outOpenedFlags
|
|
#endif
|
|
);
|
|
if (unlikely(retVal) )
|
|
{ // finish open failed
|
|
int releaseRes;
|
|
|
|
releaseRes = FhgfsOps_release(dentry->d_inode, file);
|
|
if (unlikely(releaseRes) )
|
|
{
|
|
const EntryInfo* parentEntryInfo = FhgfsInode_getEntryInfo(fhgfsParentInode);
|
|
|
|
Logger_logErrFormatted(log, logContext, "Close on error: Failed to close file "
|
|
"parentID: %s fileName: %s", EntryInfo_getEntryID(parentEntryInfo), entryName);
|
|
}
|
|
}
|
|
|
|
// clean-up
|
|
|
|
LookupIntentInfoOut_uninit(&lookupOutInfo);
|
|
|
|
retVal = 0; // success
|
|
|
|
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext, "complete. result: %d", retVal);
|
|
|
|
return retVal;
|
|
|
|
|
|
outLookupSuccessOpenFailure:
|
|
|
|
dget(dentry); /* Get another dentry reference, the caller (atomic_open) will drop it
|
|
* it again immediately */
|
|
retVal = finish_no_open(file, dentry); // successful lookup/create, but failed open
|
|
|
|
|
|
outErr:
|
|
|
|
// clean-up on error
|
|
|
|
if (unlikely(lookupOutInfo.openRes == FhgfsOpsErr_SUCCESS) )
|
|
{
|
|
RemotingIOInfo closeInfo;
|
|
FhgfsOpsErr closeRes;
|
|
const EntryInfo* parentEntryInfo = FhgfsInode_getEntryInfo(fhgfsParentInode);
|
|
AtomicInt maxUsedTargetIndex;
|
|
|
|
AtomicInt_init(&maxUsedTargetIndex, 0); // file was not open to user space yet, so 0
|
|
|
|
RemotingIOInfo_initSpecialClose(app, lookupOutInfo.fileHandleID,
|
|
&maxUsedTargetIndex, openFlags, &closeInfo);
|
|
|
|
closeRes = FhgfsOpsRemoting_closefile(lookupOutInfo.entryInfoPtr, &closeInfo, NULL);
|
|
if (closeRes != FhgfsOpsErr_SUCCESS)
|
|
Logger_logErrFormatted(log, logContext, "Close on error: Failed to close file "
|
|
"parentID: %s fileName: %s", EntryInfo_getEntryID(parentEntryInfo), entryName);
|
|
}
|
|
|
|
LookupIntentInfoOut_uninit(&lookupOutInfo);
|
|
|
|
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext, "complete. result: %d", retVal);
|
|
|
|
return retVal;
|
|
}
|
|
#endif // KERNEL_HAS_ATOMIC_OPEN
|
|
|
|
|
|
int FhgfsOps_unlink(struct inode* dir, struct dentry* dentry)
|
|
{
|
|
struct super_block* sb = dentry->d_sb;
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_unlink";
|
|
|
|
int retVal = 0;
|
|
FhgfsOpsErr unlinkRes;
|
|
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(dir);
|
|
const char* entryName = dentry->d_name.name;
|
|
struct inode* inode = dentry->d_inode;
|
|
struct FileEvent event = FILEEVENT_EMPTY;
|
|
const struct FileEvent* eventSent = NULL;
|
|
|
|
|
|
if(Logger_getLogLevel(log) >= 4)
|
|
FhgfsOpsHelper_logOp(4, app, dentry, inode, logContext);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsParentInode); // LOCK EntryInfo
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_LINK_OP)
|
|
{
|
|
FileEvent_init(&event, FileEventType_UNLINK, dentry);
|
|
eventSent = &event;
|
|
}
|
|
|
|
unlinkRes = FhgfsOpsRemoting_unlinkfile(app, FhgfsInode_getEntryInfo(fhgfsParentInode),
|
|
entryName, eventSent);
|
|
|
|
FileEvent_uninit(&event);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
|
|
if(unlinkRes != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
retVal = FhgfsOpsErr_toSysErr(unlinkRes);
|
|
|
|
if(retVal == -ENOENT)
|
|
d_drop(dentry);
|
|
}
|
|
else
|
|
{ // remote success
|
|
inode_set_mc_time(dir, current_fs_time(sb));
|
|
|
|
if(dentry->d_inode)
|
|
{
|
|
struct inode* fileInode = dentry->d_inode;
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(fileInode);
|
|
|
|
FhgfsInode_invalidateCache(fhgfsInode);
|
|
#if defined(KERNEL_HAS_INODE_CTIME)
|
|
fileInode->i_ctime = dir->i_ctime;
|
|
#else
|
|
inode_set_ctime_to_ts(fileInode, inode_get_ctime(dir));
|
|
#endif
|
|
|
|
spin_lock(&fileInode->i_lock);
|
|
// only try to drop link count if it still is > 0. Check is needed,
|
|
// because there are some situations in which this is called when the
|
|
// link count is already 0. NFS for example does the same.
|
|
if(fileInode->i_nlink > 0)
|
|
{
|
|
drop_nlink(fileInode);
|
|
}
|
|
spin_unlock(&fileInode->i_lock);
|
|
}
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Create special file.
|
|
*/
|
|
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
|
int FhgfsOps_mknod(struct mnt_idmap* idmap, struct inode* dir, struct dentry* dentry,
|
|
umode_t mode, dev_t dev)
|
|
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
|
int FhgfsOps_mknod(struct user_namespace* mnt_userns, struct inode* dir, struct dentry* dentry,
|
|
umode_t mode, dev_t dev)
|
|
#elif defined(KERNEL_HAS_UMODE_T)
|
|
int FhgfsOps_mknod(struct inode* dir, struct dentry* dentry, umode_t mode, dev_t dev)
|
|
#else
|
|
int FhgfsOps_mknod(struct inode* dir, struct dentry* dentry, int mode, dev_t dev)
|
|
#endif
|
|
{
|
|
struct super_block* sb = dentry->d_sb;
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_mknod";
|
|
|
|
int retVal = 0;
|
|
FhgfsOpsErr mkRes;
|
|
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(dir);
|
|
EntryInfo newEntryInfo;
|
|
const char* entryName = dentry->d_name.name;
|
|
const int umask = current_umask();
|
|
struct CreateInfo createInfo;
|
|
struct inode* inode = dentry->d_inode;
|
|
struct FileEvent event = FILEEVENT_EMPTY;
|
|
const struct FileEvent* eventSent = NULL;
|
|
|
|
FhgfsIsizeHints iSizeHints;
|
|
|
|
if(Logger_getLogLevel(log) >= 4)
|
|
FhgfsOpsHelper_logOp(4, app, dentry, inode, logContext);
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_LINK_OP)
|
|
{
|
|
FileEvent_init(&event, FileEventType_MKNOD, dentry);
|
|
eventSent = &event;
|
|
}
|
|
|
|
CreateInfo_init(app, dir, entryName, mode, umask, false, eventSent, &createInfo);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsParentInode); // LOCK EntryInfo
|
|
|
|
mkRes = FhgfsOpsRemoting_mkfile(app, FhgfsInode_getEntryInfo(fhgfsParentInode),
|
|
&createInfo, &newEntryInfo);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
|
|
if(mkRes != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
d_drop(dentry); // avoid leaving a negative dentry behind
|
|
retVal = FhgfsOpsErr_toSysErr(mkRes);
|
|
goto outErr;
|
|
}
|
|
|
|
retVal = __FhgfsOps_instantiateInode(dentry, &newEntryInfo, NULL, &iSizeHints);
|
|
|
|
inode_set_mc_time(dir, current_fs_time(sb));
|
|
|
|
FileEvent_uninit(&event);
|
|
|
|
outErr:
|
|
// clean-up
|
|
|
|
return retVal;
|
|
}
|
|
|
|
|
|
/**
|
|
* @param dir directory for the new link
|
|
* @param dentry dentry of the new entry that we want to create
|
|
* @param to where the symlink points to
|
|
*/
|
|
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
|
extern int FhgfsOps_symlink(struct mnt_idmap* idmap, struct inode* dir,
|
|
struct dentry* dentry, const char* to)
|
|
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
|
extern int FhgfsOps_symlink(struct user_namespace* mnt_userns, struct inode* dir,
|
|
struct dentry* dentry, const char* to)
|
|
#else
|
|
extern int FhgfsOps_symlink(struct inode* dir, struct dentry* dentry, const char* to)
|
|
#endif
|
|
{
|
|
struct super_block* sb = dentry->d_sb;
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_symlink";
|
|
|
|
int retVal;
|
|
int mode = S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO;
|
|
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(dir);
|
|
EntryInfo newEntryInfo;
|
|
const char* entryName = dentry->d_name.name;
|
|
const int umask = current_umask();
|
|
struct CreateInfo createInfo;
|
|
struct inode* inode = dentry->d_inode;
|
|
struct FileEvent event = FILEEVENT_EMPTY;
|
|
struct FileEvent* eventSent = NULL;
|
|
|
|
FhgfsIsizeHints iSizeHints;
|
|
|
|
if(unlikely(Logger_getLogLevel(log) >= 4) )
|
|
FhgfsOpsHelper_logOp(4, app, dentry, inode, logContext);
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_LINK_OP)
|
|
{
|
|
FileEvent_init(&event, FileEventType_SYMLINK, dentry);
|
|
FileEvent_setTargetStr(&event, to);
|
|
eventSent = &event;
|
|
}
|
|
|
|
CreateInfo_init(app, dir, entryName, mode, umask, false, eventSent, &createInfo);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsParentInode); // LOCK EntryInfo
|
|
|
|
// destroys &event from createInfo.fileEvent
|
|
retVal = FhgfsOpsHelper_symlink(app, FhgfsInode_getEntryInfo(fhgfsParentInode), to, &createInfo,
|
|
&newEntryInfo);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
|
|
if(retVal)
|
|
{ // error occurred
|
|
d_drop(dentry); // avoid leaving a negative dentry behind
|
|
}
|
|
else
|
|
{ // remote success => create local inode
|
|
retVal = __FhgfsOps_instantiateInode(dentry, &newEntryInfo, NULL, &iSizeHints);
|
|
|
|
inode_set_mc_time(dir, current_fs_time(sb));
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* @param fromFileDentry the already existing dentry
|
|
* @param toDirInode parent (directory) inode of the new dentry
|
|
* @param toFileDentry the new dentry (link) that we want to create
|
|
*/
|
|
int FhgfsOps_link(struct dentry* fromFileDentry, struct inode* toDirInode,
|
|
struct dentry* toFileDentry)
|
|
{
|
|
struct super_block* sb = toFileDentry->d_sb;
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Logger* log = App_getLogger(app);
|
|
Config* cfg = App_getConfig(app);
|
|
const char* logContext = "FhgfsOps_link";
|
|
|
|
struct inode* fileInode = fromFileDentry->d_inode;
|
|
FhgfsInode* fhgfsFileInode = BEEGFS_INODE(fileInode);
|
|
|
|
FhgfsInode* fhgfsToDirInode = BEEGFS_INODE(toDirInode);
|
|
|
|
struct inode* fromDirInode = fromFileDentry->d_parent->d_inode;
|
|
FhgfsInode* fhgfsFromDirInode = BEEGFS_INODE(fromDirInode);
|
|
|
|
const char* fromFileName = fromFileDentry->d_name.name;
|
|
unsigned fromFileLen = fromFileDentry->d_name.len;
|
|
|
|
const char* toFileName = toFileDentry->d_name.name;
|
|
unsigned toFileLen = toFileDentry->d_name.len;
|
|
|
|
const EntryInfo* fromDirInfo;
|
|
const EntryInfo* toDirInfo;
|
|
const EntryInfo* fromFileInfo;
|
|
|
|
int retVal;
|
|
|
|
|
|
if(unlikely(Logger_getLogLevel(log) >= Log_DEBUG) )
|
|
FhgfsOpsHelper_logOpMsg(Log_SPAM, app, fromFileDentry, fromFileDentry->d_inode, logContext,
|
|
"From: %s; To: %s", fromFileName, toFileName);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsFromDirInode); // LOCK EntryInfo
|
|
if (fhgfsFromDirInode != fhgfsToDirInode)
|
|
FhgfsInode_entryInfoReadLock(fhgfsToDirInode); // LOCK EntryInfo
|
|
FhgfsInode_entryInfoReadLock(fhgfsFileInode); // LOCK EntryInfo
|
|
|
|
fromDirInfo = FhgfsInode_getEntryInfo(fhgfsFromDirInode);
|
|
toDirInfo = FhgfsInode_getEntryInfo(fhgfsToDirInode);
|
|
fromFileInfo = FhgfsInode_getEntryInfo(fhgfsFileInode);
|
|
|
|
if (!Config_getSysCreateHardlinksAsSymlinks(cfg))
|
|
{ // create hardlink instead of symlink
|
|
int linkRes;
|
|
struct FileEvent event = FILEEVENT_EMPTY;
|
|
const struct FileEvent* eventSent = NULL;
|
|
|
|
ihold(fileInode);
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_LINK_OP)
|
|
{
|
|
FileEvent_init(&event, FileEventType_HARDLINK, toFileDentry);
|
|
FileEvent_setTargetDentry(&event, fromFileDentry);
|
|
eventSent = &event;
|
|
}
|
|
|
|
linkRes = FhgfsOpsRemoting_hardlink(app, fromFileName, fromFileLen, fromFileInfo,
|
|
fromDirInfo, toFileName, toFileLen, toDirInfo, eventSent);
|
|
|
|
FileEvent_uninit(&event);
|
|
|
|
retVal = FhgfsOpsErr_toSysErr(linkRes);
|
|
|
|
if (retVal)
|
|
{
|
|
d_drop(toFileDentry);
|
|
iput(fileInode);
|
|
}
|
|
else
|
|
{
|
|
spin_lock(&fileInode->i_lock);
|
|
inc_nlink(fileInode);
|
|
spin_unlock(&fileInode->i_lock);
|
|
|
|
d_instantiate(toFileDentry, fileInode);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// create symlink instead of hardlink
|
|
retVal = FhgfsOps_hardlinkAsSymlink(fromFileDentry, toDirInode, toFileDentry);
|
|
}
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsFileInode); // UNLOCK fromFileInfo
|
|
if (fhgfsFromDirInode != fhgfsToDirInode)
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsToDirInode); // UNLOCK toDirInfo
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsFromDirInode); // UNLOCK fromDirInfo
|
|
|
|
FhgfsInode_invalidateCache(fhgfsFileInode);
|
|
if (fhgfsFromDirInode != fhgfsToDirInode)
|
|
FhgfsInode_invalidateCache(fhgfsToDirInode);
|
|
FhgfsInode_invalidateCache(fhgfsFromDirInode);
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* create symlink instead of hardlink
|
|
*
|
|
* @param oldDentry the already existing link
|
|
* @param dir directory of the new link
|
|
* @param newDentry the new link that we want to create
|
|
*
|
|
* Note:
|
|
*/
|
|
int FhgfsOps_hardlinkAsSymlink(struct dentry* oldDentry, struct inode* dir,
|
|
struct dentry* newDentry)
|
|
{
|
|
struct super_block* sb = newDentry->d_sb;
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_link";
|
|
|
|
FhgfsIsizeHints iSizeHints;
|
|
|
|
NoAllocBufferStore* bufStore = App_getPathBufStore(app);
|
|
|
|
int retVal;
|
|
int mode = S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO;
|
|
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(dir);
|
|
EntryInfo newEntryInfo;
|
|
const char* entryName = newDentry->d_name.name;
|
|
const int umask = current_umask();
|
|
|
|
char *oldPathStr, *newPathStr;
|
|
char *oldPathStoreBuf, *newPathStoreBuf;
|
|
char *oldPathTmp, *newPathTmp;
|
|
char* oldRelativePathStr;
|
|
|
|
struct CreateInfo createInfo;
|
|
struct FileEvent event = FILEEVENT_EMPTY;
|
|
struct FileEvent* eventSent = NULL;
|
|
|
|
// resolve oldDentry to full path
|
|
oldPathTmp = __FhgfsOps_pathResolveToStoreBuf(bufStore, oldDentry, &oldPathStoreBuf);
|
|
if (unlikely(IS_ERR(oldPathTmp) ) )
|
|
{
|
|
int error = IS_ERR(oldPathTmp);
|
|
Logger_logFormatted(log, 2, logContext, "Error in link(): %d", error);
|
|
retVal = error;
|
|
goto errorOldPath;
|
|
}
|
|
|
|
oldPathStr = StringTk_strDup(oldPathTmp);
|
|
NoAllocBufferStore_addBuf(bufStore, oldPathStoreBuf);
|
|
|
|
// resolve newDentry to full path
|
|
newPathTmp = __FhgfsOps_pathResolveToStoreBuf(bufStore, newDentry, &newPathStoreBuf);
|
|
if (unlikely(IS_ERR(newPathTmp) ) )
|
|
{
|
|
int error = IS_ERR(oldPathTmp);
|
|
Logger_logFormatted(log, 2, logContext, "Error in link(): %d", error);
|
|
retVal = error;
|
|
goto errorNewPath;
|
|
}
|
|
|
|
newPathStr = StringTk_strDup(newPathTmp);
|
|
NoAllocBufferStore_addBuf(bufStore, newPathStoreBuf);
|
|
|
|
// resolve relative path from new to old
|
|
FhgfsOpsHelper_getRelativeLinkStr(newPathStr, oldPathStr, &oldRelativePathStr);
|
|
|
|
if(unlikely(Logger_getLogLevel(log) >= 4) )
|
|
{
|
|
Logger_logFormatted(log, 4, logContext, "called (as symlink). Path: %s; To: %s",
|
|
newPathStr, oldRelativePathStr);
|
|
}
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_LINK_OP)
|
|
{
|
|
FileEvent_init(&event, FileEventType_SYMLINK, newDentry);
|
|
FileEvent_setTargetDentry(&event, oldDentry);
|
|
eventSent = &event;
|
|
}
|
|
|
|
CreateInfo_init(app, dir, entryName, mode, umask, false, eventSent, &createInfo);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsParentInode); // LOCK EntryInfo
|
|
|
|
// destroys &event from createInfo.fileEvent
|
|
retVal = FhgfsOpsHelper_symlink(app, FhgfsInode_getEntryInfo(fhgfsParentInode),
|
|
oldRelativePathStr, &createInfo, &newEntryInfo);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
|
|
if(retVal)
|
|
{ // error occurred
|
|
d_drop(newDentry); // avoid leaving a negative dentry behind
|
|
}
|
|
else
|
|
{ // remote success => create local inode
|
|
retVal = __FhgfsOps_instantiateInode(newDentry, &newEntryInfo, NULL, &iSizeHints);
|
|
|
|
inode_set_mc_time(dir, current_fs_time(sb));
|
|
}
|
|
|
|
// clean-up
|
|
|
|
kfree(oldRelativePathStr);
|
|
kfree(newPathStr);
|
|
errorNewPath:
|
|
kfree(oldPathStr);
|
|
errorOldPath:
|
|
return retVal;
|
|
}
|
|
|
|
|
|
static int __beegfs_follow_link(struct dentry* dentry, char** linkBody, void** cookie)
|
|
{
|
|
struct inode* inode = dentry->d_inode;
|
|
App* app = FhgfsOps_getApp(inode->i_sb);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = "FhgfsOps_follow_link";
|
|
|
|
int readRes;
|
|
char* bufPage = (char*)__get_free_page(GFP_NOFS);
|
|
char* destination = bufPage;
|
|
|
|
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(inode);
|
|
|
|
if(unlikely(Logger_getLogLevel(log) >= 5) )
|
|
FhgfsOpsHelper_logOp(5, app, dentry, inode, logContext);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsParentInode); // LOCK EntryInfo
|
|
|
|
readRes = FhgfsOpsHelper_readlink_kernel(app, FhgfsInode_getEntryInfo(fhgfsParentInode), bufPage, PAGE_SIZE-1);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsParentInode); // UNLOCK EntryInfo
|
|
|
|
if(readRes < 0)
|
|
{
|
|
destination = ERR_PTR(readRes);
|
|
}
|
|
else
|
|
bufPage[readRes] = 0;
|
|
|
|
if(readRes == (PAGE_SIZE-1) )
|
|
{ // link destination too long
|
|
destination = ERR_PTR(-ENAMETOOLONG);
|
|
}
|
|
|
|
// store link destination
|
|
*linkBody = destination;
|
|
*cookie = bufPage;
|
|
|
|
if(IS_ERR(destination) )
|
|
{
|
|
free_page( (unsigned long)bufPage);
|
|
*cookie = destination;
|
|
|
|
return PTR_ERR(destination);
|
|
}
|
|
|
|
// Note: free_page() is called by the put_link method in the success case
|
|
return 0;
|
|
}
|
|
|
|
static void __beegfs_put_link(void* cookie)
|
|
{
|
|
free_page( (unsigned long)cookie);
|
|
}
|
|
|
|
#if defined KERNEL_HAS_GET_LINK
|
|
const char* FhgfsOps_get_link(struct dentry* dentry, struct inode* inode,
|
|
struct delayed_call* done)
|
|
{
|
|
void* cookie;
|
|
char* destination;
|
|
|
|
if (!dentry)
|
|
return ERR_PTR(-ECHILD);
|
|
|
|
if(!__beegfs_follow_link(dentry, &destination, &cookie) )
|
|
set_delayed_call(done, __beegfs_put_link, cookie);
|
|
|
|
return destination;
|
|
}
|
|
#elif defined(KERNEL_HAS_FOLLOW_LINK_COOKIE)
|
|
const char* FhgfsOps_follow_link(struct dentry* dentry, void** cookie)
|
|
{
|
|
char* destination;
|
|
|
|
__beegfs_follow_link(dentry, &destination, cookie);
|
|
return destination;
|
|
}
|
|
|
|
|
|
void FhgfsOps_put_link(struct inode* inode, void* cookie)
|
|
{
|
|
__beegfs_put_link(cookie);
|
|
}
|
|
#else
|
|
void* FhgfsOps_follow_link(struct dentry* dentry, struct nameidata* nd)
|
|
{
|
|
char* destination;
|
|
void* cookie;
|
|
|
|
__beegfs_follow_link(dentry, &destination, &cookie);
|
|
nd_set_link(nd, destination);
|
|
return cookie;
|
|
}
|
|
|
|
|
|
void FhgfsOps_put_link(struct dentry* dentry, struct nameidata* nd, void* p)
|
|
{
|
|
if(!IS_ERR(p) )
|
|
__beegfs_put_link(p);
|
|
}
|
|
#endif
|
|
|
|
|
|
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
|
int FhgfsOps_rename(struct mnt_idmap* idmap, struct inode* inodeDirFrom,
|
|
struct dentry* dentryFrom, struct inode* inodeDirTo, struct dentry* dentryTo
|
|
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
|
int FhgfsOps_rename(struct user_namespace* mnt_userns, struct inode* inodeDirFrom,
|
|
struct dentry* dentryFrom, struct inode* inodeDirTo, struct dentry* dentryTo
|
|
#else
|
|
int FhgfsOps_rename(struct inode* inodeDirFrom, struct dentry* dentryFrom,
|
|
struct inode* inodeDirTo, struct dentry* dentryTo
|
|
#endif
|
|
#ifdef KERNEL_HAS_RENAME_FLAGS
|
|
, unsigned flags
|
|
#endif
|
|
)
|
|
{
|
|
struct super_block* sb = dentryFrom->d_sb;
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Logger* log = App_getLogger(app);
|
|
const char* logContext = __func__;
|
|
FhgfsOpsErr renameRes;
|
|
|
|
struct inode* fromEntryInode = dentryFrom->d_inode; // no need to lock it, see other in kernel fs
|
|
FhgfsInode* fhgfsFromEntryInode = BEEGFS_INODE(fromEntryInode);
|
|
DirEntryType entryType = FhgfsInode_getDirEntryType(fhgfsFromEntryInode);
|
|
|
|
const EntryInfo* fromDirInfo; // EntryInfo about the 'from' directory
|
|
const EntryInfo* toDirInfo; // EntryInfo about the 'to' directory
|
|
|
|
int retVal = 0;
|
|
|
|
FhgfsInode* fhgfsFromDirInode = BEEGFS_INODE(inodeDirFrom);
|
|
FhgfsInode* fhgfsToDirInode = BEEGFS_INODE(inodeDirTo);
|
|
|
|
const char* oldName = dentryFrom->d_name.name;
|
|
unsigned oldLen = dentryFrom->d_name.len;
|
|
|
|
const char* newName = dentryTo->d_name.name;
|
|
unsigned newLen = dentryTo->d_name.len;
|
|
|
|
struct FileEvent event = FILEEVENT_EMPTY;
|
|
const struct FileEvent* eventSent = NULL;
|
|
|
|
#ifdef KERNEL_HAS_RENAME_FLAGS
|
|
if (flags != 0)
|
|
return -EINVAL;
|
|
#endif
|
|
|
|
if(unlikely(Logger_getLogLevel(log) >= Log_DEBUG) )
|
|
FhgfsOpsHelper_logOpMsg(Log_SPAM, app, dentryFrom, dentryFrom->d_inode, logContext,
|
|
"From: %s; To: %s", oldName, newName);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsFromDirInode); // LOCK EntryInfo
|
|
if (fhgfsFromDirInode != fhgfsToDirInode)
|
|
FhgfsInode_entryInfoReadLock(fhgfsToDirInode); // LOCK EntryInfo
|
|
|
|
// note the fileInode also needs to be locked to prevent reference/release during a rename
|
|
FhgfsInode_entryInfoWriteLock(fhgfsFromEntryInode); // LOCK EntryInfo (renamed file dir)
|
|
|
|
fromDirInfo = FhgfsInode_getEntryInfo(fhgfsFromDirInode);
|
|
toDirInfo = FhgfsInode_getEntryInfo(fhgfsToDirInode);
|
|
|
|
if (app->cfg->eventLogMask & EventLogMask_LINK_OP)
|
|
{
|
|
FileEvent_init(&event, FileEventType_RENAME, dentryFrom);
|
|
FileEvent_setTargetDentry(&event, dentryTo);
|
|
eventSent = &event;
|
|
}
|
|
|
|
renameRes = FhgfsOpsRemoting_rename(app, oldName, oldLen, entryType, fromDirInfo,
|
|
newName, newLen, toDirInfo, eventSent);
|
|
if(renameRes != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
int logLevel = Log_NOTICE;
|
|
const EntryInfo* fromEntryInfo = FhgfsInode_getEntryInfo(fhgfsFromEntryInode);
|
|
|
|
if( (renameRes == FhgfsOpsErr_PATHNOTEXISTS) || (renameRes == FhgfsOpsErr_INUSE) )
|
|
logLevel = Log_DEBUG; // don't bother user with non-error messages
|
|
|
|
if (renameRes == FhgfsOpsErr_EXISTS &&
|
|
dentryTo->d_inode && S_ISDIR(dentryTo->d_inode->i_mode) )
|
|
logLevel = Log_DEBUG;
|
|
|
|
Logger_logFormatted(log, logLevel, logContext,
|
|
"Rename failed: %s fromDirID: %s oldName: %s toDirID: %s newName: %s EntryID: %s",
|
|
FhgfsOpsErr_toErrString(renameRes),
|
|
fromDirInfo->entryID, oldName, toDirInfo->entryID, newName, fromEntryInfo->entryID);
|
|
|
|
retVal = FhgfsOpsErr_toSysErr(renameRes);
|
|
}
|
|
else
|
|
{ // remote success
|
|
inode_timespec ts = current_fs_time(sb);
|
|
inode_set_mc_time(inodeDirTo, ts);
|
|
inode_set_mc_time(inodeDirFrom, ts);
|
|
inode_set_ctime_to_ts(fromEntryInode, ts);
|
|
|
|
FhgfsInode_updateEntryInfoOnRenameUnlocked(fhgfsFromEntryInode, toDirInfo, newName);
|
|
}
|
|
|
|
FhgfsInode_entryInfoWriteUnlock(fhgfsFromEntryInode); // UNLOCK EntryInfo (renamed file dir)
|
|
if (fhgfsFromDirInode != fhgfsToDirInode)
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsToDirInode); // UNLOCK ToDirEntryInfo
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsFromDirInode); // UNLOCK FromDirEntryInfo
|
|
|
|
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "remoting complete. result: %d", (int)renameRes);
|
|
|
|
FileEvent_uninit(&event);
|
|
|
|
if (unlikely(retVal == -EBUSY && app->cfg->sysRenameEbusyAsXdev))
|
|
{
|
|
const EntryInfo* fromEntryInfo = FhgfsInode_getEntryInfo(fhgfsFromEntryInode);
|
|
|
|
Logger_logFormatted(log, Log_NOTICE, logContext, "Rewriting EBUSY to EXDEV: "
|
|
"%s fromDirID: %s oldName: %s toDirID: %s newName: %s EntryID: %s",
|
|
FhgfsOpsErr_toErrString(renameRes),
|
|
fromDirInfo->entryID, oldName, toDirInfo->entryID, newName, fromEntryInfo->entryID);
|
|
retVal = -EXDEV;
|
|
}
|
|
|
|
// clean-up
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Note: This is almost a copy of general vmtruncate(), just with inode->i_lock around the i_size
|
|
* updates.
|
|
* Note: This is not called directly, but via setattr
|
|
*
|
|
* @param offset file offset for truncation
|
|
*/
|
|
int FhgfsOps_vmtruncate(struct inode* inode, loff_t offset)
|
|
{
|
|
if(i_size_read(inode) < offset)
|
|
{
|
|
unsigned long limit;
|
|
|
|
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
|
|
if (limit != RLIM_INFINITY && offset > (loff_t)limit)
|
|
goto out_sig;
|
|
|
|
if (offset > (loff_t)(inode->i_sb->s_maxbytes) )
|
|
goto out_big;
|
|
|
|
spin_lock(&inode->i_lock);
|
|
i_size_write(inode, offset);
|
|
spin_unlock(&inode->i_lock);
|
|
}
|
|
else
|
|
{
|
|
struct address_space *mapping = inode->i_mapping;
|
|
|
|
/*
|
|
* truncation of in-use swapfiles is disallowed - it would
|
|
* cause subsequent swapout to scribble on the now-freed
|
|
* blocks.
|
|
*/
|
|
if(IS_SWAPFILE(inode) )
|
|
return -ETXTBSY;
|
|
|
|
spin_lock(&inode->i_lock);
|
|
i_size_write(inode, offset);
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
/*
|
|
* unmap_mapping_range is called twice, first simply for
|
|
* efficiency so that truncate_inode_pages does fewer
|
|
* single-page unmaps. However after this first call, and
|
|
* before truncate_inode_pages finishes, it is possible for
|
|
* private pages to be COWed, which remain after
|
|
* truncate_inode_pages finishes, hence the second
|
|
* unmap_mapping_range call must be made for correctness.
|
|
*/
|
|
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
|
|
truncate_inode_pages(mapping, offset);
|
|
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_sig:
|
|
send_sig(SIGXFSZ, current, 0);
|
|
out_big:
|
|
return -EFBIG;
|
|
}
|
|
|
|
/**
|
|
* Note: Call this once during module init (and remember to call _destroyInodeCache() )
|
|
*/
|
|
bool FhgfsOps_initInodeCache(void)
|
|
{
|
|
FhgfsInodeCache =
|
|
OsCompat_initKmemCache(FhgfsOpsInode_CACHE_NAME, sizeof(FhgfsInode), FhgfsOps_initInodeOnce);
|
|
|
|
if (!FhgfsInodeCache)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
void FhgfsOps_destroyInodeCache(void)
|
|
{
|
|
if(FhgfsInodeCache)
|
|
kmem_cache_destroy(FhgfsInodeCache);
|
|
}
|
|
|
|
|
|
struct inode* FhgfsOps_alloc_inode(struct super_block *sb)
|
|
{
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Config* cfg = App_getConfig(app);
|
|
|
|
FhgfsInode* fhgfsInode;
|
|
|
|
fhgfsInode = kmem_cache_alloc(FhgfsInodeCache, GFP_KERNEL);
|
|
if(unlikely(!fhgfsInode) )
|
|
return NULL;
|
|
|
|
FhgfsInode_allocInit(fhgfsInode);
|
|
|
|
fhgfsInode->vfs_inode.i_blkbits = Config_getTuneInodeBlockBits(cfg);
|
|
|
|
return (struct inode*)fhgfsInode;
|
|
}
|
|
|
|
void FhgfsOps_destroy_inode(struct inode* inode)
|
|
{
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
|
|
FhgfsInode_destroyUninit(fhgfsInode);
|
|
|
|
kmem_cache_free(FhgfsInodeCache, inode);
|
|
}
|
|
|
|
/**
|
|
* Creates a new inode, inits it from the kstat, inits the ops (depending on the mode)
|
|
* and hashes it.
|
|
*
|
|
* Note: Make sure everything is set in the kstat _before_ you call this, because we hash
|
|
* the inode in here (so it can be found and accessed by others when this method returns).
|
|
* Note: Consider using the _instantiateInode()-wrapper instead of calling this directly for new
|
|
* files/dirs.
|
|
*
|
|
* @param kstat must have a valid .ino (inode number)
|
|
* @param dev set to 0 if not required (only used for special files)
|
|
* @param entryInfoPtr contained strings will just be moved to the new inode or free'd in case of an
|
|
* error (or cached inode), so don't access the given entryInfoPtr anymore after calling this.
|
|
* @param parentNodeID: usually 0, except for NFS export callers, which needs it to connect dentries
|
|
* with their parents. By default dentries are connected to their parents, so usually this
|
|
* is not required (nfs is an exception).
|
|
* @param metaVersion: set to 0 for root inode, otherwise to the value from stat
|
|
* @return NULL if not successful
|
|
*/
|
|
struct inode* __FhgfsOps_newInodeWithParentID(struct super_block* sb, struct kstat* kstat,
|
|
dev_t dev, EntryInfo* entryInfo, NumNodeID parentNodeID, FhgfsIsizeHints* iSizeHints, unsigned int metaVersion)
|
|
{
|
|
App* app = FhgfsOps_getApp(sb);
|
|
Config* cfg = App_getConfig(app);
|
|
|
|
FhgfsInode* fhgfsInode;
|
|
|
|
FhgfsInodeComparisonInfo comparisonInfo =
|
|
{
|
|
.inodeHash = kstat->ino, // pre-set by caller
|
|
.entryID = entryInfo->entryID,
|
|
};
|
|
|
|
|
|
// check inode cache for an existing inode with this ID (and get it) or allocate a new one
|
|
|
|
struct inode* inode = iget5_locked(sb, kstat->ino,
|
|
__FhgfsOps_compareInodeID, __FhgfsOps_initNewInodeDummy, &comparisonInfo);
|
|
|
|
if(unlikely(!inode || IS_ERR(inode) ) )
|
|
goto cleanup_entryInfo; // allocation of new inode failed
|
|
|
|
fhgfsInode = BEEGFS_INODE(inode);
|
|
|
|
if( !(inode->i_state & I_NEW) )
|
|
{ // Found an existing inode, which is possibly actively used. We still need to update it.
|
|
FhgfsInode_entryInfoWriteLock(fhgfsInode); // LOCK EntryInfo
|
|
|
|
FhgfsInode_updateEntryInfoUnlocked(fhgfsInode, entryInfo);
|
|
fhgfsInode->metaVersion = metaVersion; //set the metaVersion of new inode to match the meta version
|
|
|
|
FhgfsInode_entryInfoWriteUnlock(fhgfsInode); // UNLOCK EntryInfo
|
|
|
|
spin_lock(&inode->i_lock);
|
|
|
|
__FhgfsOps_applyStatDataToInodeUnlocked(kstat, iSizeHints, inode); // already locked
|
|
Time_setToNow(&fhgfsInode->dataCacheTime);
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
goto outNoCleanUp; // we found a matching existing inode => no init needed
|
|
}
|
|
|
|
fhgfsInode->parentNodeID = parentNodeID;
|
|
|
|
/* note: new inodes are protected by the I_NEW flag from access by other threads until we
|
|
* call unlock_new_inode(). */
|
|
|
|
// init this fresh new inode...
|
|
|
|
// no one can access inode yet => unlocked
|
|
__FhgfsOps_applyStatDataToInodeUnlocked(kstat, iSizeHints, inode);
|
|
|
|
//set the version of new inode to match the meta version
|
|
FhgfsInode_entryInfoWriteLock(fhgfsInode); //LOCK EntryInfo
|
|
{
|
|
fhgfsInode->metaVersion = metaVersion; //set the metaVersion of new inode to match the meta version
|
|
}
|
|
FhgfsInode_entryInfoWriteUnlock(fhgfsInode); // UNLOCK EntryInfo
|
|
|
|
|
|
inode->i_ino = kstat->ino; // pre-set by caller
|
|
|
|
inode->i_flags |= S_NOATIME | S_NOCMTIME; // timestamps updated by server
|
|
|
|
mapping_set_gfp_mask(&inode->i_data, GFP_USER); // avoid highmem for page cache pages
|
|
|
|
// move values (no actual string copy)
|
|
fhgfsInode->entryInfo = *entryInfo;
|
|
|
|
switch (kstat->mode & S_IFMT)
|
|
{
|
|
case S_IFREG: // regular file
|
|
{
|
|
if(Config_getTuneFileCacheTypeNum(cfg) == FILECACHETYPE_Native)
|
|
{
|
|
inode->i_fop = &fhgfs_file_native_ops;
|
|
inode->i_data.a_ops = &fhgfs_addrspace_native_ops;
|
|
}
|
|
else
|
|
if(Config_getTuneFileCacheTypeNum(cfg) == FILECACHETYPE_Paged)
|
|
{ // with pagecache
|
|
inode->i_fop = &fhgfs_file_pagecache_ops;
|
|
inode->i_data.a_ops = &fhgfs_address_pagecache_ops;
|
|
}
|
|
else
|
|
{ // no pagecache (=> either none or buffered cache)
|
|
inode->i_fop = &fhgfs_file_buffered_ops;
|
|
inode->i_data.a_ops = &fhgfs_address_ops;
|
|
}
|
|
|
|
#ifdef KERNEL_HAS_ADDRESS_SPACE_BDI
|
|
inode->i_data.backing_dev_info = FhgfsOps_getBdi(sb);
|
|
#endif
|
|
|
|
inode->i_op = App_getFileInodeOps(app);
|
|
} break;
|
|
|
|
case S_IFDIR: // directory
|
|
{
|
|
inode->i_op = App_getDirInodeOps(app);
|
|
inode->i_fop = &fhgfs_dir_ops;
|
|
} break;
|
|
|
|
case S_IFLNK: // symlink
|
|
{
|
|
inode->i_op = App_getSymlinkInodeOps(app);
|
|
} break;
|
|
|
|
default: // pipes and other special files
|
|
{
|
|
inode->i_op = App_getSpecialInodeOps(app);
|
|
init_special_inode(inode, kstat->mode, dev);
|
|
} break;
|
|
}
|
|
|
|
|
|
unlock_new_inode(inode); // remove I_NEW flag, so the inode can be accessed by others
|
|
|
|
return inode;
|
|
|
|
|
|
// error occured
|
|
cleanup_entryInfo:
|
|
EntryInfo_uninit(entryInfo);
|
|
|
|
// found an existing inode
|
|
outNoCleanUp:
|
|
return inode;
|
|
}
|
|
|
|
/**
|
|
* Retrieves the attribs from the metadata node, generates an inode number (ID) and instantiates
|
|
* a local version of the inode for the dentry.
|
|
*
|
|
* Note: This is a wrapper for _newInode() that also retrieves the the entry attributes (so the
|
|
* entry must exist already on the server).
|
|
*
|
|
* @param dentry the new entry
|
|
* @param entryInfo contained values (not the struct itself!) will be owned by the inode
|
|
* (or kfreed on error). So those values need to be allocated by the caller,
|
|
* but MUST NOT be free'ed by the caller.
|
|
* @param fhgfsStat may be given to avoid extra remoting, or may be NULL (in which case a remote
|
|
* stat will be done).
|
|
* @return 0 on success, negative linux error code otherwise
|
|
*/
|
|
int __FhgfsOps_instantiateInode(struct dentry* dentry, EntryInfo* entryInfo, fhgfs_stat* fhgfsStat,
|
|
FhgfsIsizeHints* iSizeHints)
|
|
{
|
|
const char* logContext = "__FhgfsOps_instantiateInode";
|
|
App* app = FhgfsOps_getApp(dentry->d_sb);
|
|
Config* cfg = App_getConfig(app);
|
|
int retVal = 0;
|
|
fhgfs_stat fhgfsStatInternal;
|
|
fhgfs_stat* actualStatInfo; // points either external given or internal stat data
|
|
FhgfsOpsErr statRes = FhgfsOpsErr_SUCCESS;
|
|
|
|
FhgfsOpsHelper_logOpDebug(app, dentry, NULL, logContext, "(%s)",
|
|
fhgfsStat ? "with stat info" : "without stat info");
|
|
IGNORE_UNUSED_VARIABLE(logContext);
|
|
|
|
if(fhgfsStat)
|
|
actualStatInfo = fhgfsStat;
|
|
else
|
|
{ // no stat data given by caller => request from server
|
|
actualStatInfo = &fhgfsStatInternal;
|
|
|
|
FhgfsInode_initIsizeHints(NULL, iSizeHints);
|
|
|
|
statRes = FhgfsOpsRemoting_statDirect(app, entryInfo, &fhgfsStatInternal);
|
|
}
|
|
|
|
|
|
if(statRes != FhgfsOpsErr_SUCCESS)
|
|
{ // error
|
|
EntryInfo_uninit(entryInfo);
|
|
|
|
retVal = FhgfsOpsErr_toSysErr(statRes);
|
|
}
|
|
else
|
|
{ // success (entry exists on server or was already given by caller)
|
|
struct kstat kstat;
|
|
struct inode* newInode;
|
|
unsigned int metaVersion = actualStatInfo->metaVersion;
|
|
|
|
OsTypeConv_kstatFhgfsToOs(actualStatInfo, &kstat);
|
|
|
|
kstat.ino = FhgfsInode_generateInodeID(dentry->d_sb, entryInfo->entryID,
|
|
strlen(entryInfo->entryID) );
|
|
|
|
newInode = __FhgfsOps_newInode(dentry->d_sb, &kstat, 0, entryInfo, iSizeHints, metaVersion);
|
|
if(unlikely(!newInode || IS_ERR(newInode) ) )
|
|
retVal = IS_ERR(newInode) ? PTR_ERR(newInode) : -EACCES;
|
|
else
|
|
{ // new inode created
|
|
if (Config_getSysXAttrsCheckCapabilities(cfg) == CHECKCAPABILITIES_Never)
|
|
// The configuration is to never check for capabilities on writes, so use
|
|
// "inode_has_no_xattr" which does exactly one thing and that is to set the flag
|
|
// "S_NOSEC" on the inode, if the inode doesn't have the setuid and setgid bits set and
|
|
// the superblock flag "SB_NOSEC" is set. We set that flag on the superblock according
|
|
// to user configuration. When "S_NOSEC" is set on the inode, the kernel will skip
|
|
// checking for capabilities on every write operation.
|
|
inode_has_no_xattr(newInode);
|
|
|
|
d_instantiate(dentry, newInode);
|
|
}
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Compare ID of given cachedInode with ID from comparison info arg.
|
|
* This is called by iget5_locked() to make sure that we don't have an instance of a given inode
|
|
* already (e.g. due to a hardlink or from an NFS handle) before we allocate a new one.
|
|
*
|
|
* Note: This is called with a spin_lock (inode_hash_lock) held, so we may not sleep.
|
|
*
|
|
* @param voidComparisonInfo opaque data pointer as passed to iget5_locked.
|
|
* @return 0 if IDs don't match, !=0 on match.
|
|
*/
|
|
int __FhgfsOps_compareInodeID(struct inode* cachedInode, void* voidComparisonInfo)
|
|
{
|
|
FhgfsInodeComparisonInfo* comparisonInfo = voidComparisonInfo;
|
|
|
|
if(cachedInode->i_ino != comparisonInfo->inodeHash)
|
|
{ // entryID string hashes don't match
|
|
|
|
return 0;
|
|
}
|
|
else
|
|
{ // inode hashes match => compare entryID strings
|
|
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(cachedInode);
|
|
const char* searchEntryID = comparisonInfo->entryID;
|
|
|
|
return FhgfsInode_compareEntryID(fhgfsInode, searchEntryID);
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* A dummy (to be passed to iget5_locked() ), which actually does nothing.
|
|
*
|
|
* We prefer to do initialization afterwards on the new inode (which is safe, because the inode
|
|
* is still marked as new after iget5_locked returns). This method is called with the
|
|
* inode_hash_lock spin lock held and we don't want to keep that lock longer than necessary.
|
|
*
|
|
* @return 0 on success, !=0 on error (specific error code is not checked by calling code).
|
|
*/
|
|
int __FhgfsOps_initNewInodeDummy(struct inode* newInode, void* newInodeInfo)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Flush file content caches of the given inode.
|
|
*
|
|
* @return 0 on success, negative linux error code otherwise.
|
|
*/
|
|
int __FhgfsOps_flushInodeFileCache(App* app, struct inode* inode)
|
|
{
|
|
Config* cfg = App_getConfig(app);
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
|
|
if(inode->i_mapping)
|
|
{ // flush out file contents to servers for correct file size
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
|
|
bool hasWriteHandle = FhgfsInode_hasWriteHandle(fhgfsInode);
|
|
|
|
if (hasWriteHandle || FhgfsInode_getHasDirtyPages(fhgfsInode) )
|
|
{
|
|
int inodeWriteRes = write_inode_now(inode, 1);
|
|
int filemapWaitRes = filemap_fdatawait(inode->i_mapping);
|
|
|
|
if(unlikely(inodeWriteRes < 0 || filemapWaitRes < 0) )
|
|
return (inodeWriteRes < 0) ? inodeWriteRes : filemapWaitRes;
|
|
}
|
|
else
|
|
{
|
|
// no need to flush
|
|
}
|
|
}
|
|
|
|
if(S_ISREG(inode->i_mode) && Config_getTuneFileCacheTypeNum(cfg) == FILECACHETYPE_Buffered)
|
|
{ // regular file and buffered mode => flush write cache for correct file size
|
|
FhgfsOpsErr flushRes = FhgfsOpsHelper_flushCache(app, fhgfsInode, false);
|
|
if(unlikely(flushRes != FhgfsOpsErr_SUCCESS) )
|
|
return FhgfsOpsErr_toSysErr(flushRes);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Called when we want to check whether the inode has changed on the server (or to update
|
|
* the inode attribs after we've made changes to a file/dir).
|
|
* (If it has changed, we invalidate the caches.)
|
|
*
|
|
* Note: This method will indirectly aqcuire the i_lock spinlock.
|
|
*
|
|
* @param fhgfsStat if NULL is given, this method will perform a remote stat; if not NULL, no
|
|
* remoting is needed. (note that usually _flushInodeFileCache() or similar should be called before
|
|
* retrieving stat info).
|
|
* @param iSizeHints is not initialized if fhgfsStat is NULL, but must not be a NULL pointer
|
|
* @param noFlush if (unlikely) true the inode must not be flushed
|
|
* @return 0 on success (validity), negative linux error code if no longer valid
|
|
*/
|
|
int __FhgfsOps_doRefreshInode(App* app, struct inode* inode, fhgfs_stat* fhgfsStat,
|
|
FhgfsIsizeHints* iSizeHints, bool noFlush)
|
|
{
|
|
const char* logContext = "FhgfsOps_refreshInode";
|
|
Config* cfg = App_getConfig(app);
|
|
|
|
int retVal = 0;
|
|
struct kstat kstat;
|
|
int flushRes;
|
|
FhgfsOpsErr statRes;
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
|
|
#if defined(KERNEL_HAS_INODE_MTIME)
|
|
typeof(inode->i_mtime.tv_sec) oldMTime;
|
|
#else
|
|
time64_t oldMTime;
|
|
#endif
|
|
loff_t oldSize;
|
|
unsigned cacheElapsedMS;
|
|
bool mtimeSizeInvalidate;
|
|
bool timeoutInvalidate;
|
|
|
|
FhgfsOpsHelper_logOpDebug(app, NULL, inode, logContext, "(%s)",
|
|
fhgfsStat ? "with stat info" : "without stat info");
|
|
|
|
IGNORE_UNUSED_VARIABLE(logContext);
|
|
|
|
if(fhgfsStat)
|
|
{ // stat info given by caller => no remoting needed
|
|
OsTypeConv_kstatFhgfsToOs(fhgfsStat, &kstat);
|
|
}
|
|
else
|
|
{ // no stat info given by caller => get it from server
|
|
fhgfs_stat fhgfsStatInternal;
|
|
|
|
if (likely(!noFlush) )
|
|
{ // flush file contents for correct stat size
|
|
flushRes = __FhgfsOps_flushInodeFileCache(app, inode);
|
|
if(flushRes < 0)
|
|
return flushRes;
|
|
}
|
|
|
|
FhgfsInode_initIsizeHints(fhgfsInode, iSizeHints);
|
|
|
|
FhgfsInode_entryInfoReadLock(fhgfsInode); // LOCK EntryInfo
|
|
|
|
statRes = FhgfsOpsRemoting_statDirect(app, FhgfsInode_getEntryInfo(fhgfsInode),
|
|
&fhgfsStatInternal);
|
|
|
|
FhgfsInode_entryInfoReadUnlock(fhgfsInode); // UNLOCK EntryInfo
|
|
|
|
if(statRes != FhgfsOpsErr_SUCCESS)
|
|
{ // error or entry doesn't exist anymore
|
|
retVal = FhgfsOpsErr_toSysErr(statRes);
|
|
goto cleanup;
|
|
}
|
|
|
|
OsTypeConv_kstatFhgfsToOs(&fhgfsStatInternal, &kstat);
|
|
}
|
|
|
|
// stat succeeded, so the entry still exists
|
|
|
|
if(inode->i_ino == BEEGFS_INODE_ROOT_INO)
|
|
{ // root node deserves special handling (less checking)
|
|
__FhgfsOps_applyStatDataToInode(&kstat, NULL, inode);
|
|
|
|
goto cleanup;
|
|
}
|
|
|
|
// check whether entry is still the same object type
|
|
|
|
if(unlikely( (inode->i_mode & S_IFMT) != (kstat.mode & S_IFMT) ) )
|
|
{ // object type changed => limit damage by marking it as bad
|
|
|
|
/* note: this is quite impossible since we're verifying by entryID and an ID should
|
|
never ever be assigned to another object type. */
|
|
|
|
umode_t savedMode = inode->i_mode; // save mode
|
|
|
|
make_bad_inode(inode);
|
|
|
|
inode->i_mode = savedMode; // restore mode
|
|
|
|
printk_fhgfs(KERN_WARNING, "%s: Inode object type changed unexpectedly.\n", logContext);
|
|
|
|
// invalidate cached pages
|
|
if(!S_ISDIR(inode->i_mode) )
|
|
{
|
|
invalidate_remote_inode(inode);
|
|
}
|
|
|
|
retVal = -ENOENT;
|
|
goto cleanup;
|
|
}
|
|
|
|
// apply new stat data
|
|
|
|
spin_lock(&inode->i_lock); // I _ L O C K
|
|
|
|
#if defined(KERNEL_HAS_INODE_MTIME)
|
|
oldMTime = inode->i_mtime.tv_sec;
|
|
#else
|
|
oldMTime = inode_get_mtime_sec(inode);
|
|
#endif
|
|
oldSize = i_size_read(inode);
|
|
|
|
__FhgfsOps_applyStatDataToInodeUnlocked(&kstat, iSizeHints, inode);
|
|
|
|
// compare previous size/mtime to detect modifications by other clients
|
|
|
|
#if defined(KERNEL_HAS_INODE_MTIME)
|
|
mtimeSizeInvalidate =
|
|
(inode->i_mtime.tv_sec != oldMTime) || (i_size_read(inode) != oldSize);
|
|
#else
|
|
mtimeSizeInvalidate =
|
|
(inode_get_mtime_sec(inode) != oldMTime) || (i_size_read(inode) != oldSize);
|
|
#endif
|
|
cacheElapsedMS = Time_elapsedMS(&fhgfsInode->dataCacheTime);
|
|
timeoutInvalidate = cacheElapsedMS > Config_getTunePageCacheValidityMS(cfg);
|
|
|
|
if( !S_ISDIR(inode->i_mode) && (mtimeSizeInvalidate || timeoutInvalidate))
|
|
{ // file contents changed => invalidate non-dirty pages
|
|
spin_unlock(&inode->i_lock); // I _ U N L O C K
|
|
invalidate_remote_inode(inode); // might sleep => unlocked
|
|
spin_lock(&inode->i_lock); // I _ R E L O C K
|
|
}
|
|
|
|
// update the dataCacheTime because we've either invalidated the inode, or
|
|
// we've seen the mtime and size have not changed and the timeout compared to
|
|
// tunePageCacheBufferMS has not timed out either.
|
|
|
|
Time_setToNow(&fhgfsInode->dataCacheTime);
|
|
spin_unlock(&inode->i_lock); // I _ U N L O C K
|
|
|
|
|
|
// clean up
|
|
cleanup:
|
|
|
|
return retVal;
|
|
}
|
|
|
|
|
|
/**
|
|
* Note: Refreshes inode and mapping only if cache validity timeout expired.
|
|
*
|
|
* @return 0 on success (validity), negative linux error code if no longer valid
|
|
*/
|
|
int __FhgfsOps_revalidateMapping(App* app, struct inode* inode)
|
|
{
|
|
const char* logContext = "FhgfsOps_revalidateMapping";
|
|
Config* cfg = App_getConfig(app);
|
|
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
|
|
unsigned cacheElapsedMS;
|
|
bool timeoutInvalidate;
|
|
bool needRefresh = false;
|
|
|
|
FhgfsIsizeHints iSizeHints;
|
|
|
|
FhgfsOpsHelper_logOp(Log_SPAM, app, NULL, inode, logContext);
|
|
IGNORE_UNUSED_VARIABLE(logContext);
|
|
|
|
spin_lock(&inode->i_lock); // I _ L O C K
|
|
|
|
cacheElapsedMS = Time_elapsedMS(&fhgfsInode->dataCacheTime);
|
|
timeoutInvalidate = cacheElapsedMS > Config_getTunePageCacheValidityMS(cfg);
|
|
|
|
if(!S_ISDIR(inode->i_mode) && timeoutInvalidate)
|
|
needRefresh = true;
|
|
|
|
spin_unlock(&inode->i_lock); // I _ U N L O C K
|
|
|
|
if(needRefresh)
|
|
return __FhgfsOps_refreshInode(app, inode, NULL, &iSizeHints);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Used for client cache invalidation with metadata version.
|
|
* Either clears inode stripe pattern or makes the inode invalid.
|
|
* Acquires i_lock spinlock.
|
|
*/
|
|
void __FhgfsOps_clearInodeStripePattern(App* app, struct inode* inode)
|
|
{
|
|
const char* logContext = "FhgfsOps_clearInodeStripePattern";
|
|
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
|
|
IGNORE_UNUSED_VARIABLE(logContext);
|
|
spin_lock(&inode->i_lock);
|
|
|
|
//check the inode reference counter
|
|
if (atomic_read(&inode->i_count) == 1)
|
|
{
|
|
//we are the only ones with reference, clear stripe pattern (updates on open)
|
|
FhgfsInode_clearStripePattern(fhgfsInode);
|
|
spin_unlock(&inode->i_lock);
|
|
}
|
|
else
|
|
{
|
|
//someone else is holding a reference to the inode
|
|
//should not occur, but as a precaution we mark the inode as bad
|
|
//forces new inode creation (and therefore new stripe pattern)
|
|
umode_t savedMode = inode->i_mode; // save mode
|
|
spin_unlock(&inode->i_lock); //must release spin lock before make_bad_inode
|
|
make_bad_inode(inode);
|
|
inode->i_mode = savedMode; // restore mode
|
|
|
|
FhgfsOpsHelper_logOpMsg(Log_WARNING, app, NULL, inode, logContext,
|
|
"Blocked access to inode due to unexpected inode access during cache invalidation");
|
|
|
|
|
|
// invalidate cached pages
|
|
if(!S_ISDIR(inode->i_mode) )
|
|
{
|
|
invalidate_remote_inode(inode);
|
|
}
|
|
}
|
|
}
|