New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

View File

@@ -0,0 +1,647 @@
#include <app/log/Logger.h>
#include <common/toolkit/StringTk.h>
#include <common/storage/PathInfo.h>
#include <filesystem/FhgfsOpsHelper.h>
#include "FhgfsOpsSuper.h"
#include "FhgfsInode.h"
/**
* Called once for initialization of new inodes (and not again if they are recycled) when they
* are pre-alloced by the mem cache.
*
* Note: Initializes only the FhGFS part of the inode, not the general VFS part.
*/
void FhgfsInode_initOnce(FhgfsInode* fhgfsInode)
{
RWLock_init(&fhgfsInode->entryInfoLock);
Mutex_init(&fhgfsInode->fileHandlesMutex);
Mutex_init(&fhgfsInode->rangeLockPIDsMutex);
AtomicInt_init(&fhgfsInode->numRangeLockPIDs, 0);
IntMap_init(&fhgfsInode->rangeLockPIDs);
Mutex_init(&fhgfsInode->appendMutex);
AtomicInt_init(&fhgfsInode->appendFDsOpen, 0);
RWLock_init(&fhgfsInode->fileCacheLock);
FhgfsInode_setNumDirtyPages(fhgfsInode, 0);
}
/**
* Called each time an inode is alloced from the mem cache.
*
* Note: Initializes only the FhGFS part of the inode, not the general VFS part.
*/
void FhgfsInode_allocInit(FhgfsInode* fhgfsInode)
{
/* note: keep in mind that we don't only do first-time init here, but also reset everything that
might have been left over after re-cycling */
int i;
memset(&fhgfsInode->entryInfo, 0, sizeof(fhgfsInode->entryInfo) );
fhgfsInode->parentNodeID = (NumNodeID){0};
memset(&fhgfsInode->pathInfo, 0, sizeof(fhgfsInode->pathInfo) );
Time_init(&fhgfsInode->dataCacheTime);
memset(&fhgfsInode->fileHandles, 0, sizeof(fhgfsInode->fileHandles) );
for(i=0; i < BEEGFS_INODE_FILEHANDLES_NUM; i++)
{
fhgfsInode->fileHandles[i].needsAppendLockCleanup = false;
AtomicInt_set(&(fhgfsInode->fileHandles[i].maxUsedTargetIndex), -1);
// not zeroed at this place, referenceHandle() will zero the BitStore
BitStore_init(&(fhgfsInode->fileHandles[i].firstWriteDone), false);
}
fhgfsInode->pattern = NULL;
AtomicInt_set(&fhgfsInode->numRangeLockPIDs, 0);
fhgfsInode->fileCacheBuffer.buf = NULL;
fhgfsInode->fileCacheBuffer.bufType = FileBufferType_NONE;
FhgfsInode_setNumDirtyPages(fhgfsInode, 0);
AtomicInt_set(&fhgfsInode->writeBackCounter, 0);
AtomicInt_set(&fhgfsInode->noRemoteIsizeDecrease, 0);
AtomicInt64_set(&fhgfsInode->lastWriteBackEndOrIsizeWriteTime, 0);
fhgfsInode->flags = 0;
fhgfsInode->fileVersion = 0;
fhgfsInode->metaVersion = 0;
atomic_set(&fhgfsInode->modified, 0);
atomic_set(&fhgfsInode->coRWInProg, 0);
}
/**
* Called each time an inode is returned to the mem cache.
*
* Note: Destroys only the FhGFS part of the inode, not the general VFS part.
*/
void FhgfsInode_destroyUninit(FhgfsInode* fhgfsInode)
{
/* note: keep in mind that we shouldn't reset everything here because we don't know whether this
inode will ever be recycled. but we definitely need to free all alloc'ed stuff here. */
int i;
EntryInfo_uninit(&fhgfsInode->entryInfo);
PathInfo_uninit(&fhgfsInode->pathInfo);
SAFE_DESTRUCT_NOSET(fhgfsInode->pattern, StripePattern_virtualDestruct);
IntMap_clear(&fhgfsInode->rangeLockPIDs);
for(i=0; i < BEEGFS_INODE_FILEHANDLES_NUM; i++)
{
BitStore_uninit(&fhgfsInode->fileHandles[i].firstWriteDone);
}
}
/**
* @param openFlags OPENFILE_ACCESS_... flags
* @param allowRWHandle true if you specified read or write flags and would also accept
* a rw handle to save the overhead for open/close (only appropriate for writepage etc., use
* _handleTypeToOpenFlags() in this case when you get the RemoteIOInfo).
* @param handleType pass this to releaseHandle()
* @param lookupInfo NULL if this is 'normal' open, non-NULL if the file was already remotely opened
* by lookup-intent or atomic_open
* @param outFileHandleID caller may not free or modify (it's not alloced for the caller!)
*/
FhgfsOpsErr FhgfsInode_referenceHandle(FhgfsInode* this, struct dentry* dentry, int openFlags,
bool allowRWHandle, LookupIntentInfoOut* lookupInfo, FileHandleType* outHandleType,
uint32_t* outVersion)
{
App* app = FhgfsOps_getApp(this->vfs_inode.i_sb);
Logger* log = App_getLogger(app);
const char* logContext = "FhgfsInode_referenceHandle";
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
FhgfsInodeFileHandle* desiredHandle;
FhgfsInodeFileHandle* rwHandle = &this->fileHandles[FileHandleType_RW];
*outHandleType = __FhgfsInode_openFlagsToHandleType(openFlags);
desiredHandle = &this->fileHandles[*outHandleType];
if(unlikely(Logger_getLogLevel(log) >= Log_SPAM) )
FhgfsOpsHelper_logOpMsg(Log_SPAM, app, NULL, &this->vfs_inode, logContext, "handle-type: %d",
*outHandleType);
Mutex_lock(&this->fileHandlesMutex); // L O C K
if(desiredHandle->refCount)
{ // desired handle exists => return it
retVal = __FhgfsInode_referenceTrunc(this, app, openFlags, dentry);
if (retVal == FhgfsOpsErr_SUCCESS && outVersion)
retVal = FhgfsOpsRemoting_getFileVersion(app, &this->entryInfo, outVersion);
if(retVal != FhgfsOpsErr_SUCCESS)
goto err_unlock;
desiredHandle->refCount++;
//*outFileHandleID = desiredHandle->fileHandleID;
}
else
if(allowRWHandle && rwHandle->refCount)
{ // rw handle allowed and exists => return it
retVal = __FhgfsInode_referenceTrunc(this, app, openFlags, dentry);
if (retVal == FhgfsOpsErr_SUCCESS && outVersion)
retVal = FhgfsOpsRemoting_getFileVersion(app, &this->entryInfo, outVersion);
if(retVal != FhgfsOpsErr_SUCCESS)
goto err_unlock;
rwHandle->refCount++;
//*outFileHandleID = rwHandle->fileHandleID;
*outHandleType = FileHandleType_RW;
}
else
{ // no matching handle yet => open file to create a new handle
RemotingIOInfo ioInfo;
const EntryInfo* entryInfo;
PathInfo* pathInfo = FhgfsInode_getPathInfo(this);
if (lookupInfo)
{ // file already open by lookup/atomic_open
entryInfo = lookupInfo->entryInfoPtr;
ioInfo.fileHandleID = lookupInfo->fileHandleID;
ioInfo.pattern = lookupInfo->stripePattern;
PathInfo_update(pathInfo, &lookupInfo->pathInfo);
retVal = FhgfsOpsErr_SUCCESS;
}
else
{ // file yet opened by lookup or atomic open
struct FileEvent event = FILEEVENT_EMPTY;
const struct FileEvent* eventSent = NULL;
__FhgfsInode_initOpenIOInfo(this, desiredHandle, openFlags, pathInfo, &ioInfo);
FhgfsInode_entryInfoReadLock(this); // LOCK EntryInfo
// Prioritize the evaluation of read-write flags to determine the appropriate event type.
// The order of assessment is critical as only one event will be dispatched for a
// file's open() operation, contingent upon the supplied read-write flags.
// The sequence for evaluating read-write flags is as follows:
// 1. Read and Write
// 2. Write Only
// 3. Read Only
if ((openFlags & OPENFILE_ACCESS_READWRITE) &&
(app->cfg->eventLogMask & EventLogMask_OPEN_READ_WRITE))
{
FileEvent_init(&event, FileEventType_OPEN_READ_WRITE, dentry);
eventSent = &event;
}
else if ((openFlags & OPENFILE_ACCESS_WRITE) &&
(app->cfg->eventLogMask & EventLogMask_OPEN_WRITE))
{
FileEvent_init(&event, FileEventType_OPEN_WRITE, dentry);
eventSent = &event;
}
else if ((openFlags & OPENFILE_ACCESS_READ) &&
(app->cfg->eventLogMask & EventLogMask_OPEN_READ))
{
FileEvent_init(&event, FileEventType_OPEN_READ, dentry);
eventSent = &event;
}
if ((openFlags & OPENFILE_ACCESS_TRUNC) && (app->cfg->eventLogMask & EventLogMask_TRUNC))
{
FileEvent_init(&event, FileEventType_TRUNCATE, dentry);
eventSent = &event;
}
entryInfo = FhgfsInode_getEntryInfo(this);
retVal = FhgfsOpsRemoting_openfile(entryInfo, &ioInfo, outVersion, eventSent);
FileEvent_uninit(&event);
FhgfsInode_entryInfoReadUnlock(this); // UNLOCK EntryInfo
}
if(retVal == FhgfsOpsErr_SUCCESS)
{
unsigned stripeCount;
desiredHandle->fileHandleID = ioInfo.fileHandleID;
this->pattern = ioInfo.pattern;
stripeCount = FhgfsInode_getStripeCount(this);
BitStore_setSize(&desiredHandle->firstWriteDone, stripeCount);
BitStore_clearBits(&desiredHandle->firstWriteDone);
desiredHandle->refCount++;
//*outFileHandleID = ioInfo.fileHandleID;
}
}
err_unlock:
// clean up
Mutex_unlock(&this->fileHandlesMutex); // U N L O C K
return retVal;
}
/**
* Note: even if this results in a (remote) error, the ref count will be decreased and the caller
* may no longer access the corresponding ressources.
*
* @param handleType what you got back from _referenceHandle()
*/
FhgfsOpsErr FhgfsInode_releaseHandle(FhgfsInode* this, FileHandleType handleType,
struct dentry* dentry)
{
App* app = FhgfsOps_getApp(this->vfs_inode.i_sb);
Logger* log = App_getLogger(app);
const char* logContext = "FhgfsInode_releaseHandle";
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
FhgfsInodeFileHandle* handle = &this->fileHandles[handleType];
if(unlikely(Logger_getLogLevel(log) >= 5) )
FhgfsOpsHelper_logOp(Log_SPAM, app, NULL, &this->vfs_inode, logContext);
Mutex_lock(&this->fileHandlesMutex); // L O C K
if(unlikely(!handle->refCount) )
BEEGFS_BUG_ON(true, "refCount already 0"); // refcount already zero => bug
else
{ // positive refCount
handle->refCount--;
if(!handle->refCount)
{ // we dropped the last reference => close remote
int openFlags = FhgfsInode_handleTypeToOpenFlags(handleType);
RemotingIOInfo ioInfo;
const EntryInfo* entryInfo;
PathInfo* pathInfo = NULL; // not required here
struct FileEvent event = FILEEVENT_EMPTY;
struct FileEvent* eventSent = NULL;
__FhgfsInode_initOpenIOInfo(this, handle, openFlags, pathInfo, &ioInfo);
FhgfsInode_entryInfoReadLock(this); // LOCK EntryInfo
entryInfo = FhgfsInode_getEntryInfo(this);
if (handleType != FileHandleType_READ && (app->cfg->eventLogMask & EventLogMask_CLOSE))
{
FileEvent_init(&event, FileEventType_CLOSE_WRITE, dentry);
eventSent = &event;
}
// &event is destroyed by callee
retVal = FhgfsOpsHelper_closefileWithAsyncRetry(entryInfo, &ioInfo, eventSent);
FhgfsInode_entryInfoReadUnlock(this); // UNLOCK EntryInfo
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext,
"handleID: %s remoting complete. result: %s", handle->fileHandleID,
FhgfsOpsErr_toErrString(retVal) );
// clean up
kfree(handle->fileHandleID);
handle->needsAppendLockCleanup = false;
AtomicInt_set(&handle->maxUsedTargetIndex, -1);
BitStore_setSize(&handle->firstWriteDone, 0); // free extra mem from very high stripe count
}
}
Mutex_unlock(&this->fileHandlesMutex); // U N L O C K
return retVal;
}
bool FhgfsInode_hasWriteHandle(FhgfsInode* this)
{
bool retVal = false;
FileHandleType handleType;
FhgfsInodeFileHandle* handle;
Mutex_lock(&this->fileHandlesMutex); // L O C K
handleType = FileHandleType_WRITE;
handle = &this->fileHandles[handleType];
if (handle->refCount)
{
retVal = true;
goto out;
}
handleType = FileHandleType_RW;
handle = &this->fileHandles[handleType];
if (handle->refCount)
{
retVal = true;
goto out;
}
out:
Mutex_unlock(&this->fileHandlesMutex); // U N L O C K
return retVal;
}
/**
* Remove a pid (if it existed) and decrease the numRangeLockPIDs counter.
*
* @return true if the pid was found (i.e. was previously added via _addRangeLockPID() ).
*/
bool FhgfsInode_removeRangeLockPID(FhgfsInode* this, int pid)
{
bool pidExisted;
Mutex_lock(&this->rangeLockPIDsMutex); // L O C K
pidExisted = IntMap_erase(&this->rangeLockPIDs, pid);
if(pidExisted)
AtomicInt_dec(&this->numRangeLockPIDs);
Mutex_unlock(&this->rangeLockPIDsMutex); // U N L O C K
return pidExisted;
}
/**
* Add a new pid (if it didn't exist already) and increased the numRangeLockPIDs counter.
*/
void FhgfsInode_addRangeLockPID(FhgfsInode* this, int pid)
{
bool pidInserted;
Mutex_lock(&this->rangeLockPIDsMutex); // L O C K
pidInserted = IntMap_insert(&this->rangeLockPIDs, pid, NULL);
if(pidInserted)
AtomicInt_inc(&this->numRangeLockPIDs);
Mutex_unlock(&this->rangeLockPIDsMutex); // U N L O C K
}
/**
* Converts OPENFILE_ACCESS_... fhgfs flags to FileHandleType
*/
FileHandleType __FhgfsInode_openFlagsToHandleType(int openFlags)
{
switch(openFlags & OPENFILE_ACCESS_MASK_RW)
{
case OPENFILE_ACCESS_WRITE:
{
return FileHandleType_WRITE;
} break;
case OPENFILE_ACCESS_READWRITE:
{
return FileHandleType_RW;
} break;
default:
{
return FileHandleType_READ;
} break;
}
}
/**
* Truncates the file if OPENFILE_ACCESS_TRUNC has been specified in openFlags.
*/
FhgfsOpsErr __FhgfsInode_referenceTrunc(FhgfsInode* this, App* app, int openFlags,
struct dentry* dentry)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
if(openFlags & OPENFILE_ACCESS_TRUNC)
{
struct FileEvent event = FILEEVENT_EMPTY;
const struct FileEvent* eventSent = NULL;
const EntryInfo* entryInfo;
FhgfsInode_entryInfoReadLock(this); // LOCK EntryInfo
entryInfo = FhgfsInode_getEntryInfo(this);
if (app->cfg->eventLogMask & EventLogMask_TRUNC)
{
FileEvent_init(&event, FileEventType_TRUNCATE, dentry);
eventSent = &event;
}
retVal = FhgfsOpsRemoting_truncfile(app, entryInfo, 0, eventSent);
FileEvent_uninit(&event);
FhgfsInode_entryInfoReadUnlock(this); // UNLOCK EntryInfo
}
return retVal;
}
/**
* Init minimal ioInfo for mds open/close (referenceHandle/releaseHandle), so without the values
* that are e.g. only required for read/write etc.
*/
void __FhgfsInode_initOpenIOInfo(FhgfsInode* this, FhgfsInodeFileHandle* fileHandle,
unsigned accessFlags, PathInfo* pathInfo, RemotingIOInfo* outIOInfo)
{
outIOInfo->app = FhgfsOps_getApp(this->vfs_inode.i_sb);
outIOInfo->fileHandleID = fileHandle->fileHandleID;
outIOInfo->pattern = this->pattern;
outIOInfo->pathInfo = pathInfo;
outIOInfo->accessFlags = accessFlags;
outIOInfo->needsAppendLockCleanup = &fileHandle->needsAppendLockCleanup;
outIOInfo->maxUsedTargetIndex = &fileHandle->maxUsedTargetIndex;
outIOInfo->firstWriteDone = NULL;
outIOInfo->userID = i_uid_read(&this->vfs_inode);
outIOInfo->groupID = i_gid_read(&this->vfs_inode);
}
/**
* Retrieve IO information for a file which was previously referenced (i.e. opened).
*
* Note: This may only be used by callers that have aqcuired a reference, either explicitly by
* calling _referenceHandle() or implicitly via open (=> struct file::FsFileInfo).
*
* @param outIOInfo the IO info will be stored in this out-arg; no buffers will be allocated, so
* there is no need to cleanup anything afterwards; but be sure to only use this while your
* reference is valid.
*/
void FhgfsInode_getRefIOInfo(FhgfsInode* this, FileHandleType handleType,
unsigned accessFlags, RemotingIOInfo* outIOInfo)
{
FhgfsInodeFileHandle* fileHandle = &this->fileHandles[handleType];
__FhgfsInode_initOpenIOInfo(this, fileHandle, accessFlags, &this->pathInfo, outIOInfo);
// remaining values, which are not assigned by initOpenIOInfo()...
outIOInfo->firstWriteDone = &fileHandle->firstWriteDone;
}
/**
* Acquire the internal file cache lock.
*
* Note: remember to call the corresponding unlock method!!
*/
void FhgfsInode_fileCacheSharedLock(FhgfsInode* this)
{
RWLock_readLock(&this->fileCacheLock);
}
void FhgfsInode_fileCacheSharedUnlock(FhgfsInode* this)
{
RWLock_readUnlock(&this->fileCacheLock);
}
/**
* Acquire the internal file cache lock.
*
* Note: remember to call the corresponding unlock method!!
*/
void FhgfsInode_fileCacheExclusiveLock(FhgfsInode* this)
{
RWLock_writeLock(&this->fileCacheLock);
}
/**
* Acquire the internal file cache lock if it is available immediately.
*
* Note: remember to call the corresponding unlock method (if the lock has been acquired)!!
*
* @return 1 if lock acquired, 0 if contention
*/
int FhgfsInode_fileCacheExclusiveTryLock(FhgfsInode* this)
{
return RWLock_writeTryLock(&this->fileCacheLock);
}
void FhgfsInode_fileCacheExclusiveUnlock(FhgfsInode* this)
{
RWLock_writeUnlock(&this->fileCacheLock);
}
/**
* Get the current attached file cache entry.
* Caller must hold the fileCache lock to work with this cache entry!
*/
struct CacheBuffer* Fhgfsinode_getFileCacheBuffer(FhgfsInode* this)
{
return &this->fileCacheBuffer;
}
/**
* Generate IDs suitable for i_ino.
*
* @param entryID zero-terminated string
* @param entryIDLen entryID length without terminating zero
*/
uint64_t FhgfsInode_generateInodeID(struct super_block* sb, const char* entryID, int entryIDLen)
{
App* app = FhgfsOps_getApp(sb);
Config* cfg = App_getConfig(app);
InodeIDStyle inodeIDStyle = Config_getSysInodeIDStyleNum(cfg);
size_t hashBits; // 32 or 64 bit
if(sizeof(ino_t) >= sizeof(uint64_t) )
hashBits = 64;
else
hashBits = 32;
/* note: iunique() or something else, which doesn't generate reproducible results isn't
allowed here, because we need a reproducible i_ino/hashval for
iget5_locked/__FhgfsOps_compareInodeID. */
switch(inodeIDStyle)
{
case INODEIDSTYLE_Hash64HSieh:
{
uint64_t hashRes = HashTk_hash(HASHTK_HSIEHHASH32, hashBits, entryID, entryIDLen);
// add terminating zero to hash buffer if resultig ID is too low
if(unlikely(hashRes <= BEEGFS_INODE_MAXRESERVED_INO) )
hashRes = HashTk_hash(HASHTK_HSIEHHASH32, hashBits, entryID, entryIDLen+1);
return hashRes;
} break;
case INODEIDSTYLE_Hash64MD4:
{
uint64_t hashRes = HashTk_hash(HASHTK_HALFMD4, hashBits, entryID, entryIDLen);
// add terminating zero to hash buffer if resultig ID is too low
if(unlikely(hashRes <= BEEGFS_INODE_MAXRESERVED_INO) )
hashRes = HashTk_hash(HASHTK_HALFMD4, hashBits, entryID, entryIDLen+1);
return hashRes;
} break;
case INODEIDSTYLE_Hash32MD4:
{ // generate 32bit hash of fhgfs entryID string
uint64_t hashRes = HashTk_hash32(HASHTK_HALFMD4, entryID, entryIDLen);
// add terminating zero to hash buffer if resultig ID is too low
if(unlikely(hashRes <= BEEGFS_INODE_MAXRESERVED_INO) )
hashRes = HashTk_hash32(HASHTK_HALFMD4, entryID, entryIDLen+1);
return hashRes;
} // fall through to hsieh32bit hash on 32bit systems...
default:
{ // generate 32bit hash of fhgfs entryID string
uint32_t hashRes = HashTk_hash32(HASHTK_HSIEHHASH32, entryID, entryIDLen);
// add terminating zero to hash buffer if resultig ID is too low
if(unlikely(hashRes <= BEEGFS_INODE_MAXRESERVED_INO) )
hashRes = HashTk_hash32(HASHTK_HSIEHHASH32, entryID, entryIDLen+1);
return hashRes;
}
}
}
bool FhgfsInode_isCacheValid(FhgfsInode* this, umode_t i_mode, Config* cfg)
{
unsigned cacheValidityMS;
unsigned cacheElapsedMS;
bool cacheValid;
if(S_ISDIR(i_mode) )
cacheValidityMS = Config_getTuneDirSubentryCacheValidityMS(cfg); // defaults to 1s
else
cacheValidityMS = Config_getTuneFileSubentryCacheValidityMS(cfg); // defaults to 0
if(!cacheValidityMS) // caching disabled
cacheValid = false;
else
{
cacheElapsedMS = Time_elapsedMS(&this->dataCacheTime);
cacheValid = cacheValidityMS > cacheElapsedMS;
}
return cacheValid;
}

View File

@@ -0,0 +1,759 @@
#ifndef FHGFSINODE_H_
#define FHGFSINODE_H_
#include <app/App.h>
#include <app/config/Config.h>
#include <common/Common.h>
#include <common/threading/AtomicInt64.h>
#include <common/threading/Mutex.h>
#include <common/threading/RWLock.h>
#include <common/storage/PathInfo.h>
#include <common/storage/EntryInfo.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageErrors.h>
#include <common/storage/Metadata.h>
#include <common/toolkit/Time.h>
#include <common/toolkit/HashTk.h>
#include <common/toolkit/MetadataTk.h>
#include <common/toolkit/tree/IntMapIter.h>
#include <filesystem/FhgfsInode.h>
#include <net/filesystem/RemotingIOInfo.h>
#include <toolkit/BitStore.h>
#include <os/OsCompat.h>
#include "FhgfsOpsSuper.h"
#include <linux/fs.h>
#include <linux/vfs.h>
#define BEEGFS_INODE_ROOT_INO 2 /* traditional root inode number */
#define BEEGFS_INODE_MAXRESERVED_INO BEEGFS_INODE_ROOT_INO
#define BEEGFS_INODE_FILEHANDLES_NUM 3 /* r, rw, w (see "enum FileHandleType") */
#define BEEGFS_INODE(inodePointer) ( (FhgfsInode*)inodePointer)
#define BEEGFS_VFSINODE(fhgfsInodePointer) ( (struct inode*)fhgfsInodePointer)
#define BEEGFS_INODE_FLAG_WRITE_ERROR 1 // there was a write error with this inode
// if set switches to sync writes to faster notify
// applications
#define BEEGFS_INODE_FLAG_PAGED 2 // the inode was written to from page functions
struct StripePattern; // forward declaration
struct FhgfsInodeFileHandle;
typedef struct FhgfsInodeFileHandle FhgfsInodeFileHandle;
enum FileHandleType;
typedef enum FileHandleType FileHandleType;
enum FileBufferType;
typedef enum FileBufferType FileBufferType;
struct CacheBuffer;
typedef struct CacheBuffer CacheBuffer;
struct FhgfsInode;
typedef struct FhgfsInode FhgfsInode;
struct FhgfsIsizeHints;
typedef struct FhgfsIsizeHints FhgfsIsizeHints;
enum FileHandleType // (note: used as array index for FhgfsInode::fileHandles)
{FileHandleType_READ=0, FileHandleType_RW=1, FileHandleType_WRITE=2};
enum FileBufferType
{FileBufferType_NONE=0, FileBufferType_READ=1, FileBufferType_WRITE=2};
struct CacheBuffer
{
char* buf; // the file contents cache buffer
//size_t bufLen; // length of buf (not needed, because we have bufUsageMaxLen)
size_t bufUsageLen; // how much of the buffer is already used up
size_t bufUsageMaxLen; // how much of the buffer may be used (is min(bufLen, chunk_end) )
loff_t fileOffset; // the file offset where this buffer starts, -1 for append
enum FileBufferType bufType;
};
/*
* Used to avoid isize invalid decreases in paged mode due to outdated server side i_size
*/
struct FhgfsIsizeHints
{
bool ignoreIsize; // caller knows that the i_size needs to be ignored
uint64_t timeBeforeRemoteStat; // time before doing a remote stat call
};
// kernel inode cache helpers
extern void FhgfsInode_initOnce(FhgfsInode* this);
extern void FhgfsInode_allocInit(FhgfsInode* this);
extern void FhgfsInode_destroyUninit(FhgfsInode* this);
// public extern
extern FhgfsOpsErr FhgfsInode_referenceHandle(FhgfsInode* this, struct dentry* dentry,
int openFlags, bool allowRWHandle, LookupIntentInfoOut* lookupInfo,
FileHandleType* outHandleType, uint32_t* outVersion);
extern FhgfsOpsErr FhgfsInode_releaseHandle(FhgfsInode* this, FileHandleType handleType,
struct dentry* dentry);
extern bool FhgfsInode_hasWriteHandle(FhgfsInode* this);
extern void FhgfsInode_getRefIOInfo(FhgfsInode* this, FileHandleType handleType,
unsigned accessFlags, RemotingIOInfo* outIOInfo);
extern void FhgfsInode_fileCacheExclusiveLock(FhgfsInode* this);
extern void FhgfsInode_fileCacheExclusiveUnlock(FhgfsInode* this);
extern int FhgfsInode_fileCacheExclusiveTryLock(FhgfsInode* this);
extern void FhgfsInode_fileCacheSharedLock(FhgfsInode* this);
extern void FhgfsInode_fileCacheSharedUnlock(FhgfsInode* this);
extern struct CacheBuffer* Fhgfsinode_getFileCacheBuffer(FhgfsInode* this);
extern bool FhgfsInode_removeRangeLockPID(FhgfsInode* this, int pid);
extern void FhgfsInode_addRangeLockPID(FhgfsInode* this, int pid);
extern uint64_t FhgfsInode_generateInodeID(struct super_block* sb, const char* entryID,
int entryIDLen);
extern bool FhgfsInode_isCacheValid(FhgfsInode* this, umode_t i_mode, Config* cfg);
// private extern
extern FileHandleType __FhgfsInode_openFlagsToHandleType(int openFlags);
extern FhgfsOpsErr __FhgfsInode_referenceTrunc(FhgfsInode* this, App* app, int openFlag,
struct dentry* dentry);
extern void __FhgfsInode_initOpenIOInfo(FhgfsInode* this, FhgfsInodeFileHandle* fileHandle,
unsigned accessFlags, PathInfo* pathInfo, RemotingIOInfo* outIOInfo);
// getters & setters
static inline const EntryInfo* FhgfsInode_getEntryInfo(FhgfsInode* this);
static inline PathInfo* FhgfsInode_getPathInfo(FhgfsInode* this);
static inline DirEntryType FhgfsInode_getDirEntryType(FhgfsInode* this);
static inline int FhgfsInode_getNumRangeLockPIDs(FhgfsInode* this);
static inline bool FhgfsInode_getIsFileOpen(FhgfsInode* this);
static inline bool FhgfsInode_getIsFileOpenByMultipleReaders(FhgfsInode* this);
static inline void FhgfsInode_setLastWriteBackOrIsizeWriteTime(FhgfsInode* this);
static inline uint64_t FhgfsInode_getLastWriteBackOrIsizeWriteTime(FhgfsInode* this);
static inline int FhgfsInode_getWriteBackCounter(FhgfsInode* this);
static inline void FhgfsInode_setWritePageError(FhgfsInode* this);
static inline int FhgfsInode_getHasWritePageError(FhgfsInode* this);
// inliners
static inline void Fhgfsinode_appendLock(FhgfsInode* this);
static inline void Fhgfsinode_appendUnlock(FhgfsInode* this);
static inline void _FhgfsInode_initRootEntryInfo(FhgfsInode* this);
static inline void FhgfsInode_entryInfoReadLock(FhgfsInode* this);
static inline void FhgfsInode_entryInfoReadUnlock(FhgfsInode* this);
static inline void FhgfsInode_entryInfoWriteLock(FhgfsInode* this);
static inline void FhgfsInode_entryInfoWriteUnlock(FhgfsInode* this);
static inline bool FhgfsInode_compareEntryID(FhgfsInode* this, const char* otherEntryID);
static inline void FhgfsInode_updateEntryInfoOnRenameUnlocked(FhgfsInode* this,
const EntryInfo* newParentInfo, const char* newEntryName);
static inline void FhgfsInode_updateEntryInfoUnlocked(FhgfsInode* this,
const EntryInfo* newEntryInfo);
static inline int FhgfsInode_getStripeCount(FhgfsInode* this);
static inline StripePattern* FhgfsInode_getStripePattern(FhgfsInode* this);
static inline void FhgfsInode_clearStripePattern(FhgfsInode* this);
static inline int FhgfsInode_handleTypeToOpenFlags(FileHandleType handleType);
static inline void FhgfsInode_invalidateCache(FhgfsInode* this);
static inline void FhgfsInode_incNumDirtyPages(FhgfsInode* this);
static inline void FhgfsInode_decNumDirtyPages(FhgfsInode* this);
static inline uint64_t FhgfsInode_getNumDirtyPages(FhgfsInode* this);
static inline void FhgfsInode_setNumDirtyPages(FhgfsInode* this, uint64_t value);
static inline bool FhgfsInode_getHasDirtyPages(FhgfsInode* this);
static inline void FhgfsInode_setParentNodeID(FhgfsInode* this, NumNodeID parentNodeID);
static inline NumNodeID FhgfsInode_getParentNodeID(FhgfsInode* this);
static inline void FhgfsInode_incWriteBackCounter(FhgfsInode* this);
static inline void FhgfsInode_decWriteBackCounter(FhgfsInode* this);
static inline void FhgfsInode_initIsizeHints(FhgfsInode* this, FhgfsIsizeHints* outISizeHints);
static inline void FhgfsInode_setNoIsizeDecrease(FhgfsInode* this);
static inline void FhgfsInode_unsetNoIsizeDecrease(FhgfsInode* this);
static inline int FhgfsInode_getNoIsizeDecrease(FhgfsInode* this);
static inline void FhgfsInode_setPageWriteFlag(FhgfsInode* this);
static inline void FhgfsInode_clearWritePageError(FhgfsInode* this);
static inline int FhgfsInode_getHasPageWriteFlag(FhgfsInode* this);
/**
* Represents an open file handle on the mds.
*/
struct FhgfsInodeFileHandle
{
const char* fileHandleID; // set when this handle is used for a remote file open
unsigned refCount; // number of references to this handle
bool needsAppendLockCleanup; // true if append lock release failed
AtomicInt maxUsedTargetIndex; /* zero-based target index in stripe pattern (-1 means "none").
this is the highest target index that was read/written, so higher targets don't need
fsync() or close(). */
BitStore firstWriteDone; /* one bit per storage target in stripe pattern; bit is set when we send
data to this target. */
};
/**
* Represents a linux file or directory inode with fhgfs extensions.
*/
struct FhgfsInode
{
struct inode vfs_inode; // linux vfs inode
EntryInfo entryInfo;
RWLock entryInfoLock;
NumNodeID parentNodeID; /* nodeID of the parent directory, should be moved to EntryInfo
* in the future. So far only used for NFS exports and only set for
* DirInodes in FhgfsOpsExport to reconnect dentries. */
PathInfo pathInfo;
Time dataCacheTime; /* last time we updated file contents (monotonic clock) or dir attribs;
protected by i_lock */
Mutex fileHandlesMutex;
FhgfsInodeFileHandle fileHandles[BEEGFS_INODE_FILEHANDLES_NUM]; // use FileHandleType as index
struct StripePattern* pattern; // initialized when file is opened; multiple readers allowed
Mutex rangeLockPIDsMutex;
AtomicInt numRangeLockPIDs; // modified only with mutex held, but readable without it
IntMap rangeLockPIDs; /* all PIDs (TGIDs) that used range locking and have this file open
(values are dummies) */
Mutex appendMutex; // for atomic writes with append-flag
AtomicInt appendFDsOpen;
struct CacheBuffer fileCacheBuffer; // current buffer cache entry (protected by fileCacheLock)
RWLock fileCacheLock; /* for sync'ed access to cacheEntry and page cache (required to ensure
order, e.g. when a new r/w-call comes in while we're about to disable caching) */
AtomicInt64 numPageCacheDirtyPages; // number of written (=> dirty) bytes since last flush
/* writeBackCounter and lastWriteBackEndOrIsizeWriteTime are hints for refreshInode to
* ingore the servers inode size */
AtomicInt writeBackCounter; // number of writeBack threads sending pages to the server
AtomicInt64 lastWriteBackEndOrIsizeWriteTime;
AtomicInt noRemoteIsizeDecrease; // if set remote attributes won't decrease the i_size
int flags; // protected by inode->i_lock
uint32_t fileVersion; /* protected by entryInfoLock (which would subsume a more granular lock).
used by native cache mode to track file contents change. */
uint32_t metaVersion; /* protected by entryInfoLock (which would subsume a more granular lock).
used as a way to invalidate the cache due to internal metadata changes
(like stripe pattern change) via lookup::revalidateIntent. */
atomic_t modified; // tracks whether the inode data has been written since its last full flush
atomic_t coRWInProg; //Coherent read/write in progress.
};
int FhgfsInode_getNumRangeLockPIDs(FhgfsInode* this)
{
return AtomicInt_read(&this->numRangeLockPIDs);
}
/**
* Test whether a given file inode is currently open.
*
* @return true if the file is currently open.
*/
bool FhgfsInode_getIsFileOpen(FhgfsInode* this)
{
bool retVal = false;
int i;
Mutex_lock(&this->fileHandlesMutex); // L O C K
for(i=0; i < BEEGFS_INODE_FILEHANDLES_NUM; i++)
{
if(this->fileHandles[i].refCount)
{ // we found an open file handle
retVal = true;
break;
}
}
Mutex_unlock(&this->fileHandlesMutex); // U N L O C K
return retVal;
}
/**
* Test whether a given file inode is currently open for reading by multiple readers.
*
* Note: This is specifically intended to give a hint for read-ahead disabling (because the current
* buffered mode logic could do read-ahead trashing with multiple concurrent readers on the same
* file). Since this is just a hint, we don't care about mutex locking here.
*
* @return true if the file is currently open by more than one reader (including read+write).
*/
bool FhgfsInode_getIsFileOpenByMultipleReaders(FhgfsInode* this)
{
unsigned numReaders = this->fileHandles[FileHandleType_READ].refCount +
this->fileHandles[FileHandleType_RW].refCount;
return (numReaders > 1);
}
/**
* Acquire the internal append mutex
*
* Note: remember to call _appendUnlock()!!
*/
void Fhgfsinode_appendLock(FhgfsInode* this)
{
Mutex_lock(&this->appendMutex);
}
void Fhgfsinode_appendUnlock(FhgfsInode* this)
{
Mutex_unlock(&this->appendMutex);
}
/**
* Get the EntryInfo of this inode
*
* Note: you will probably need to call _entryInfoReadLock() before using this (and don't forget to
* unlock afterwards).
*/
const EntryInfo* FhgfsInode_getEntryInfo(FhgfsInode* this)
{
return &this->entryInfo;
}
PathInfo* FhgfsInode_getPathInfo(FhgfsInode* this)
{
return &this->pathInfo;
}
/**
* Initialize the root inode. We can not do that at mount time as we allow mounts without a
* connection to the management server.
*/
void _FhgfsInode_initRootEntryInfo(FhgfsInode* this)
{
struct inode* vfs_inode = &this->vfs_inode;
struct super_block* sb = vfs_inode->i_sb;
bool hasRootInfo = FhgfsOps_getHasRootEntryInfo(sb);
// unlikely as it will happen only once
if (unlikely(!hasRootInfo) && (vfs_inode->i_ino == BEEGFS_INODE_ROOT_INO) )
{
/* root inode, EntryInfo not yet set during mount, as the meta server might be unknown.
* Set it now. */
App* app = FhgfsOps_getApp(sb);
RWLock_writeLock(&this->entryInfoLock); // Write-LOCK
hasRootInfo = FhgfsOps_getHasRootEntryInfo(sb); // verify under a lock
if (unlikely(hasRootInfo) )
{
// another thread updated it already, nothing to do
}
else
{
bool isSuccess;
EntryInfo_uninit(&this->entryInfo);
isSuccess = MetadataTk_getRootEntryInfoCopy(app, &this->entryInfo);
if (isSuccess)
FhgfsOps_setHasRootEntryInfo(sb, true);
}
RWLock_writeUnlock(&this->entryInfoLock); // Write-UNLOCK
}
}
/*
* Get an EntryInfo read-lock.
*
* Note: Must be taken on reading parentEntryID. Should not be taken if only entryID is read,
* as entryID is never updated.
* See FhgfsInode_updateEntryInfoUnlocked() and FhgfsInode_updateEntryInfoOnRenameUnlocked()
* Note: If the root inode is not initialized, it will be initialized by this method under
* a write-lock.
*/
void FhgfsInode_entryInfoReadLock(FhgfsInode* this)
{
_FhgfsInode_initRootEntryInfo(this); // NOTE: might temporarily writelock entryInfoLock
RWLock_readLock(&this->entryInfoLock); // Read-LOCK
}
void FhgfsInode_entryInfoReadUnlock(FhgfsInode* this)
{
RWLock_readUnlock(&this->entryInfoLock);
}
/**
* Note: Also might initialize the root-inode.
*/
void FhgfsInode_entryInfoWriteLock(FhgfsInode* this)
{
_FhgfsInode_initRootEntryInfo(this); // NOTE: might temporarily writelock entryInfoLock
RWLock_writeLock(&this->entryInfoLock); // Write-LOCK
}
void FhgfsInode_entryInfoWriteUnlock(FhgfsInode* this)
{
RWLock_writeUnlock(&this->entryInfoLock);
}
/**
* Compares given entryID with the entryID of this inode.
*
* Note: This is intended to be called from methods like __FhgfsOps_compareInodeID, which is
* called e.g. by ilookup5 with the inode_hash_lock held, so this method may not sleep
*
* @return true if given ID matches, false otherwise
*/
bool FhgfsInode_compareEntryID(FhgfsInode* this, const char* otherEntryID)
{
if(this->vfs_inode.i_ino == BEEGFS_INODE_ROOT_INO)
{ // root node => owner info is not stored in the inode
return !strcmp(otherEntryID, META_ROOTDIR_ID_STR);
}
else
{
const char* entryID;
entryID = this->entryInfo.entryID;
if (unlikely(!entryID) )
{
printk_fhgfs(KERN_WARNING, "Bug: entryID is a NULL pointer!\n");
return false;
}
return !strcmp(otherEntryID, entryID);
}
}
/**
* Return the DirEntryType of the given inode
*/
DirEntryType FhgfsInode_getDirEntryType(FhgfsInode* this)
{
return this->entryInfo.entryType;
}
/**
* Update EntryInfo on rename
*
* Note: newParentInfo and newName are not owned by this object!
*
* Note: FhgfsInode->entryInfoLock already needs to be write-locked by the caller.
*/
void FhgfsInode_updateEntryInfoOnRenameUnlocked(FhgfsInode* this, const EntryInfo* newParentInfo,
const char* newName)
{
EntryInfo_updateParentEntryID(&this->entryInfo, newParentInfo->entryID);
EntryInfo_updateFileName(&this->entryInfo, newName);
if (!DirEntryType_ISDIR(this->entryInfo.entryType) )
{ // only update the ownerNodeID if it is not a directory
this->entryInfo.owner = newParentInfo->owner;
if (this->entryInfo.owner.isGroup)
this->entryInfo.featureFlags |= ENTRYINFO_FEATURE_BUDDYMIRRORED;
else
this->entryInfo.featureFlags &= ~ENTRYINFO_FEATURE_BUDDYMIRRORED;
}
}
/**
* Update an existing EntryInfo with a new one
*
* Note: this->entryInfoLock already needs to be write-locked by the caller.
*/
void FhgfsInode_updateEntryInfoUnlocked(FhgfsInode* this, const EntryInfo* newEntryInfo)
{
EntryInfo_update(&this->entryInfo, newEntryInfo);
}
int FhgfsInode_getStripeCount(FhgfsInode* this)
{
return UInt16Vec_length(this->pattern->getStripeTargetIDs(this->pattern));
}
StripePattern* FhgfsInode_getStripePattern(FhgfsInode* this)
{
return this->pattern;
}
/**
* Clear the stripe pattern of an inode.
* Should only be called if inode->i_count is 1.
* After stripePattern is set to NULL, next operation should only be open.
* Nothing should access the pattern ptr until open loads a new pattern.
* Note: This is called with a spin_lock held.
*/
void FhgfsInode_clearStripePattern(FhgfsInode* this)
{
this->pattern=NULL;
}
/**
* Converts FileHandleType to OPENFILE_ACCESS_... fhgfs flags.
*
* Note: This is especially required when a handle was referenced with allowRWHandle, because then
* you don't know the OPENFILE_ACCESS_... flags.
*/
int FhgfsInode_handleTypeToOpenFlags(FileHandleType handleType)
{
switch(handleType)
{
case FileHandleType_WRITE:
{
return OPENFILE_ACCESS_WRITE;
} break;
case FileHandleType_RW:
{
return OPENFILE_ACCESS_READWRITE;
} break;
default:
{
return OPENFILE_ACCESS_READ;
} break;
}
}
/**
* Invalidate the cache of an inode
*/
void FhgfsInode_invalidateCache(FhgfsInode* this)
{
Time_setZero(&this->dataCacheTime);
}
void FhgfsInode_incNumDirtyPages(FhgfsInode* this)
{
AtomicInt64_inc(&this->numPageCacheDirtyPages);
}
void FhgfsInode_decNumDirtyPages(FhgfsInode* this)
{
AtomicInt64_dec(&this->numPageCacheDirtyPages);
}
uint64_t FhgfsInode_getNumDirtyPages(FhgfsInode* this)
{
return AtomicInt64_read(&this->numPageCacheDirtyPages);
}
void FhgfsInode_setNumDirtyPages(FhgfsInode* this, uint64_t value)
{
return AtomicInt64_set(&this->numPageCacheDirtyPages, value);
}
/**
* Test whether a given file inode is currently write-opened.
*
* @return true if the file is currently open.
*/
bool FhgfsInode_getHasDirtyPages(FhgfsInode* this)
{
return !!FhgfsInode_getNumDirtyPages(this);
}
void FhgfsInode_setParentNodeID(FhgfsInode* this, NumNodeID parentNodeID)
{
this->parentNodeID = parentNodeID;
}
NumNodeID FhgfsInode_getParentNodeID(FhgfsInode* this)
{
return this->parentNodeID;
}
void FhgfsInode_incWriteBackCounter(FhgfsInode* this)
{
AtomicInt_inc(&this->writeBackCounter);
}
void FhgfsInode_decWriteBackCounter(FhgfsInode* this)
{
AtomicInt_dec(&this->writeBackCounter);
}
/**
* @param this: Will be NULL on lookup calls
*/
int FhgfsInode_getWriteBackCounter(FhgfsInode* this)
{
if (!this)
return 0;
return AtomicInt_read(&this->writeBackCounter);
}
void FhgfsInode_setNoIsizeDecrease(FhgfsInode* this)
{
AtomicInt_set(&this->noRemoteIsizeDecrease, 1);
}
/**
* Unset noRemoteIsizeDecrease if appropriate
*/
void FhgfsInode_unsetNoIsizeDecrease(FhgfsInode* this)
{
struct address_space *mapping = this->vfs_inode.i_mapping;
if (!FhgfsInode_getNoIsizeDecrease(this) )
return; // not set, so no need to do anything
if (FhgfsInode_getWriteBackCounter(this) )
return; // still write-back threads left
// check if there are dirty, writeback or mmapped-write pages
if ( (FhgfsInode_getHasDirtyPages(this) ) ||
(mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ) ||
(mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK) ) ||
(mapping_writably_mapped(mapping) ) )
{ // server did not receive all pages yet
return;
}
AtomicInt_set(&this->noRemoteIsizeDecrease, 0);
}
int FhgfsInode_getNoIsizeDecrease(FhgfsInode* this)
{
return AtomicInt_read(&this->noRemoteIsizeDecrease);
}
void FhgfsInode_setLastWriteBackOrIsizeWriteTime(FhgfsInode* this)
{
AtomicInt64_set(&this->lastWriteBackEndOrIsizeWriteTime, get_jiffies_64() );
}
uint64_t FhgfsInode_getLastWriteBackOrIsizeWriteTime(FhgfsInode* this)
{
return AtomicInt64_read(&this->lastWriteBackEndOrIsizeWriteTime);
}
/**
* Initialize iSizeHints
*
* @param this might be a NULL pointer
*
* Note: As optimization it will only read atomic values and jiffies if the inode refers to
* a regular file. As this is not known before doing lookup-stat, this might be a NULL
* pointer.
*/
void FhgfsInode_initIsizeHints(FhgfsInode* this, FhgfsIsizeHints* outISizeHints)
{
struct address_space *mapping;
if (this && !S_ISREG(this->vfs_inode.i_mode) )
return;
outISizeHints->timeBeforeRemoteStat = get_jiffies_64();
outISizeHints->ignoreIsize = false;
/* Check if there is a hint that we should ignore the remote isize */
if (this)
{
mapping = this->vfs_inode.i_mapping;
if ((FhgfsInode_getWriteBackCounter(this) || FhgfsInode_getHasDirtyPages(this) ||
(mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ) ||
(mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK) ) ||
(mapping_writably_mapped(mapping) ) ) )
{
outISizeHints->ignoreIsize = true;
}
}
}
/**
* Set the page-write flag (there was a page write to this inode
*/
void FhgfsInode_setPageWriteFlag(FhgfsInode* this)
{
/* Note: This is called for each an every page (in paged mode, mmap, splice, ...)
* and therefore a very hot path and therefore needs to be optimized.
* Mostly the read will succeed if there are paged writes and the read will only fail
* for the first inode. And as reading values is cheaper than writing values, we first
* check if the flag is already set */
if (!FhgfsInode_getHasPageWriteFlag(this) )
this->flags |= BEEGFS_INODE_FLAG_PAGED;
}
/**
* Get the page-write flag
*
* Note: In order to avoid inode locking overhead, this is usually called without hold i_lock
* It is not perfectly reliable then, but at some point CPUs get synchronized and that
* is sufficient for us.
*/
int FhgfsInode_getHasPageWriteFlag(FhgfsInode* this)
{
return this->flags & BEEGFS_INODE_FLAG_PAGED;
}
/**
* There was an error writing pages of this inode
*/
void FhgfsInode_setWritePageError(FhgfsInode* this)
{
this->flags |= BEEGFS_INODE_FLAG_WRITE_ERROR;
}
/**
* Clear the write error
*/
void FhgfsInode_clearWritePageError(FhgfsInode* this)
{
this->flags &= ~(BEEGFS_INODE_FLAG_WRITE_ERROR);
}
int FhgfsInode_getHasWritePageError(FhgfsInode* this)
{
return this->flags & BEEGFS_INODE_FLAG_WRITE_ERROR;
}
#endif /* FHGFSINODE_H_ */

View File

@@ -0,0 +1,319 @@
#include <app/App.h>
#include <app/log/Logger.h>
#include <common/threading/Mutex.h>
#include <common/Common.h>
#include <common/toolkit/Time.h>
#include <net/filesystem/FhgfsOpsRemoting.h>
#include <filesystem/FsDirInfo.h>
#include <filesystem/FsFileInfo.h>
#include <toolkit/NoAllocBufferStore.h>
#include <common/toolkit/MetadataTk.h>
#include <common/Common.h>
#include "FhgfsOps_versions.h"
#include "FhgfsOpsInode.h"
#include "FhgfsOpsDir.h"
#include "FhgfsOpsSuper.h"
#include "FhgfsOpsHelper.h"
#include <linux/namei.h>
static int __FhgfsOps_revalidateIntent(struct dentry* parentDentry, struct dentry* dentry);
struct dentry_operations fhgfs_dentry_ops =
{
.d_revalidate = FhgfsOps_revalidateIntent,
.d_delete = FhgfsOps_deleteDentry,
};
/**
* Called when the dcache has a lookup hit (and wants to know whether the cache data
* is still valid).
*
* @return value is quasi-boolean: 0 if entry invalid, 1 if still valid (no other return values
* allowed).
*/
#ifndef KERNEL_HAS_ATOMIC_OPEN
int FhgfsOps_revalidateIntent(struct dentry* dentry, struct nameidata* nameidata)
#else
int FhgfsOps_revalidateIntent(struct dentry* dentry, unsigned flags)
#endif // LINUX_VERSION_CODE
{
App* app;
Config* cfg;
Logger* log;
const char* logContext;
int isValid = 0; // quasi-boolean (return value)
struct dentry* parentDentry;
struct inode* parentInode;
struct inode* inode;
Time now;
unsigned long nowTime;
unsigned cacheValidityMS;
#ifndef KERNEL_HAS_ATOMIC_OPEN
unsigned flags = nameidata ? nameidata->flags : 0;
IGNORE_UNUSED_VARIABLE(flags);
#endif // LINUX_VERSION_CODE
#ifdef LOOKUP_RCU
/* note: 2.6.38 introduced rcu-walk mode, which is inappropriate for us, because we need the
parentDentry and need to sleep for communication. ECHILD below tells vfs to call this again
in old ref-walk mode. (see Documentation/filesystems/vfs.txt:d_revalidate) */
if(flags & LOOKUP_RCU)
return -ECHILD;
#endif // LINUX_VERSION_CODE
inode = dentry->d_inode;
// access to parentDentry and inode needs to live below the rcu check.
app = FhgfsOps_getApp(dentry->d_sb);
cfg = App_getConfig(app);
log = App_getLogger(app);
logContext = "FhgfsOps_revalidateIntent";
parentDentry = dget_parent(dentry);
parentInode = parentDentry->d_inode;
if(unlikely(Logger_getLogLevel(log) >= 5) )
FhgfsOpsHelper_logOp(Log_SPAM, app, dentry, inode, logContext);
if(!inode || !parentInode || is_bad_inode(inode) )
{
if(inode && S_ISDIR(inode->i_mode) )
{
if(have_submounts(dentry) )
goto cleanup_put_parent;
shrink_dcache_parent(dentry);
}
// dentry->d_time is updated with the current time during the first lookup,
// the cache is only valid if the difference of CURRENT_TIME and revalidate
// time is less than the tuneENOENTCacheValidityMS from the config
Time_setToNowReal(&now);
cacheValidityMS = Config_getTuneENOENTCacheValidityMS(cfg);
nowTime = (now.tv_sec * 1000000000UL + now.tv_nsec);
if (!cacheValidityMS || ((nowTime - dentry->d_time)/1000000UL) > cacheValidityMS)
{
d_drop(dentry);
goto cleanup_put_parent;
}
else
{
isValid = 1;
goto cleanup_put_parent;
}
}
// active dentry => remote-stat and local-compare
isValid = __FhgfsOps_revalidateIntent(parentDentry, dentry );
cleanup_put_parent:
// clean-up
dput(parentDentry);
LOG_DEBUG_FORMATTED(log, 5, logContext, "'%s': isValid: %s",
dentry->d_name.name, isValid ? "yes" : "no");
return isValid;
}
/*
* sub function of FhgfsOps_revalidateIntent(), supposed to be inlined, as we resolve several
* pointers two times, in this function and also already in the caller
*
* @return value is quasi-boolean: 0 if entry invalid, 1 if still valid (no other return values
* allowed).
*/
int __FhgfsOps_revalidateIntent(struct dentry* parentDentry, struct dentry* dentry)
{
const char* logContext = "__FhgfsOps_revalidateIntent";
App* app = FhgfsOps_getApp(dentry->d_sb);
Logger* log = App_getLogger(app);
Config* cfg = App_getConfig(app);
const char* entryName = dentry->d_name.name;
fhgfs_stat fhgfsStat;
fhgfs_stat* fhgfsStatPtr;
struct inode* parentInode = parentDentry->d_inode;
FhgfsInode* parentFhgfsInode = BEEGFS_INODE(parentInode);
struct inode* inode = dentry->d_inode;
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
bool cacheValid = FhgfsInode_isCacheValid(fhgfsInode, inode->i_mode, cfg);
int isValid = 0; // quasi-boolean (return value)
bool needDrop = false;
FhgfsIsizeHints iSizeHints;
FhgfsOpsHelper_logOp(Log_SPAM, app, dentry, inode, logContext);
if (cacheValid)
{
isValid = 1;
return isValid;
}
if(IS_ROOT(dentry) )
fhgfsStatPtr = NULL;
else
{ // any file or directory except our mount root
const EntryInfo* parentInfo;
EntryInfo* entryInfo;
uint32_t metaVersion;
FhgfsOpsErr remotingRes;
LookupIntentInfoIn inInfo; // input data for combo-request
LookupIntentInfoOut outInfo; // result data of combo-request
FhgfsInode_initIsizeHints(fhgfsInode, &iSizeHints);
FhgfsInode_entryInfoReadLock(parentFhgfsInode); // LOCK parentInfo
FhgfsInode_entryInfoWriteLock(fhgfsInode); // LOCK EntryInfo
parentInfo = FhgfsInode_getEntryInfo(parentFhgfsInode);
entryInfo = &fhgfsInode->entryInfo;
metaVersion = fhgfsInode->metaVersion;
LookupIntentInfoIn_init(&inInfo, parentInfo, entryName);
LookupIntentInfoIn_addEntryInfo(&inInfo, entryInfo);
LookupIntentInfoIn_addMetaVersion(&inInfo, metaVersion);
LookupIntentInfoOut_prepare(&outInfo, NULL, &fhgfsStat);
remotingRes = FhgfsOpsRemoting_lookupIntent(app, &inInfo, &outInfo);
// we only need to update entryInfo flags
if (remotingRes == FhgfsOpsErr_SUCCESS && outInfo.revalidateRes == FhgfsOpsErr_SUCCESS)
entryInfo->featureFlags = outInfo.revalidateUpdatedFlags;
FhgfsInode_entryInfoWriteUnlock(fhgfsInode); // UNLOCK EntryInfo
FhgfsInode_entryInfoReadUnlock(parentFhgfsInode); // UNLOCK parentInfo
if (unlikely(remotingRes != FhgfsOpsErr_SUCCESS) )
{
needDrop = true;
goto out;
}
if (outInfo.revalidateRes == FhgfsOpsErr_PATHNOTEXISTS)
{
if(unlikely(!(outInfo.responseFlags & LOOKUPINTENTRESPMSG_FLAG_REVALIDATE) ) )
Logger_logErrFormatted(log, logContext, "Unexpected revalidate info missing: %s",
entryInfo->fileName);
needDrop = true;
goto out;
}
// check the stat result here and set fhgfsStatPtr accordingly
if (outInfo.statRes == FhgfsOpsErr_SUCCESS)
fhgfsStatPtr = &fhgfsStat; // successful, so we can use existing stat values
else if (outInfo.statRes == FhgfsOpsErr_NOTOWNER)
fhgfsStatPtr = NULL; // stat values not available
else if (outInfo.statRes == FhgfsOpsErr_INTERNAL
&& outInfo.revalidateRes == FhgfsOpsErr_METAVERSIONMISMATCH
&& Config_getSysCacheInvalidationVersion(cfg) )
{
// case when we want to invalidate the inode cache due to inode version change
// this case will not drop the dentry
// must goto out, since we clear the inode stripe pattern
fhgfsStatPtr = NULL; // stat values not available
__FhgfsOps_clearInodeStripePattern(app, inode);
goto out;
}
else
{
if(unlikely(!(outInfo.responseFlags & LOOKUPINTENTRESPMSG_FLAG_STAT) ) )
Logger_logErrFormatted(log, logContext, "Unexpected stat info missing: %s",
entryInfo->fileName);
// now its getting difficult as there is an unexpected error
needDrop = true;
goto out;
}
}
if (!__FhgfsOps_refreshInode(app, inode, fhgfsStatPtr, &iSizeHints) )
isValid = 1;
else
isValid = 0;
out:
if (needDrop)
d_drop(dentry);
return isValid;
}
/**
* This is called from dput() when d_count is going to 0 and dput() wants to know from us whether
* not it should delete the dentry.
*
* @return !=0 to delete dentry, 0 to keep it
*/
#ifndef KERNEL_HAS_D_DELETE_CONST_ARG
int FhgfsOps_deleteDentry(struct dentry* dentry)
#else
int FhgfsOps_deleteDentry(const struct dentry* dentry)
#endif // LINUX_VERSION_CODE
{
int shouldBeDeleted = 0; // quasi-boolean (return value)
struct inode* inode = dentry->d_inode;
// For both positive and negative dentry cases,
// dentry cache (dcache) is mainatined
if(inode)
{
if(is_bad_inode(inode) )
shouldBeDeleted = 1; // inode marked as bad => no need to keep dentry
}
return shouldBeDeleted;
}
/*
* Constructs the path from the root dentry (of the mount-point) to an arbitrary hashed dentry.
*
* Note: Acquires a buf from the pathBufStore that must be released by the caller.
* Note: This is safe for paths larger than bufSize, but returns -ENAMETOOLONG in that case
* NOTE: Do NOT call two times in the same thread, as it might deadlock if multiple threads
* try to aquire a buffer. The thread that takes the buffer two times might never finish,
* if not sufficient buffers are available. If multiple threads take multiple buffers in
* parallel an infinity deadlock of the filesystem will happen.
*
* @outBuf the buf that must be returned to the pathBufStore of the app
* @return some offset within the outStoreBuf or the linux ERR_PTR(errorCode) (already negative) and
* *outStoreBuf will be NULL then
*/
char* __FhgfsOps_pathResolveToStoreBuf(NoAllocBufferStore* bufStore, struct dentry* dentry,
char** outStoreBuf)
{
char * path;
const ssize_t storeBufLen = NoAllocBufferStore_getBufSize(bufStore);
*outStoreBuf = NoAllocBufferStore_waitForBuf(bufStore);
path = dentry_path_raw(dentry, *outStoreBuf, storeBufLen);
if (unlikely (IS_ERR(path) ) )
{
NoAllocBufferStore_addBuf(bufStore, *outStoreBuf);
*outStoreBuf = NULL;
}
return path;
}

View File

@@ -0,0 +1,39 @@
#ifndef FHGFSOPSDIR_H_
#define FHGFSOPSDIR_H_
#include <app/App.h>
#include <common/threading/Mutex.h>
#include <common/Common.h>
#include <common/toolkit/Time.h>
#include <net/filesystem/FhgfsOpsRemoting.h>
#include <filesystem/FsDirInfo.h>
#include <filesystem/FsFileInfo.h>
#include <toolkit/NoAllocBufferStore.h>
#include <common/toolkit/MetadataTk.h>
#include <common/Common.h>
#include "FhgfsOps_versions.h"
#include "FhgfsOpsInode.h"
#include <linux/dcache.h>
extern struct dentry_operations fhgfs_dentry_ops;
#ifndef KERNEL_HAS_ATOMIC_OPEN
extern int FhgfsOps_revalidateIntent(struct dentry* dentry, struct nameidata* nameidata);
#else
extern int FhgfsOps_revalidateIntent(struct dentry* dentry, unsigned flags);
#endif // LINUX_VERSION_CODE
#ifndef KERNEL_HAS_D_DELETE_CONST_ARG
extern int FhgfsOps_deleteDentry(struct dentry* dentry);
#else
extern int FhgfsOps_deleteDentry(const struct dentry* dentry);
#endif // LINUX_VERSION_CODE
extern char* __FhgfsOps_pathResolveToStoreBuf(NoAllocBufferStore* bufStore,
struct dentry* dentry, char** outStoreBuf);
#endif /* FHGFSOPSDIR_H_ */

View File

@@ -0,0 +1,951 @@
#include <common/Common.h> // (placed up here for LINUX_VERSION_CODE definition)
/**
* NFS export is probably not worth the backport efforts for kernels before 2.6.29
*/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
#include <common/storage/Metadata.h>
#include <common/toolkit/StringTk.h>
#include <filesystem/FhgfsInode.h>
#include <filesystem/FhgfsOpsInode.h>
#include <net/filesystem/FhgfsOpsRemoting.h>
#include <os/OsTypeConversion.h>
#include "FhgfsOpsExport.h"
#include "FhgfsOpsHelper.h"
#include "FhgfsOpsFile.h"
#include "FsDirInfo.h"
#include "FhgfsOpsDir.h"
/**
* Operations for NFS export (and open_by_handle).
*/
const struct export_operations fhgfs_export_ops =
{
.encode_fh = FhgfsOpsExport_encodeNfsFileHandle,
.fh_to_dentry = FhgfsOpsExport_nfsFileHandleToDentry,
.fh_to_parent = FhgfsOpsExport_nfsFileHandleToParent,
.get_parent = FhgfsOpsExport_getParentDentry,
.get_name = FhgfsOpsExport_getName,
};
// placed here to make it possible to inline it (compiler decision)
static int __FhgfsOpsExport_encodeNfsFileHandleV3(struct inode* inode, __u32* file_handle_buf,
int* max_len, const EntryInfo* parentInfo);
static bool __FhgfsOpsExport_iterateDirFindName(struct dentry* dentry, const char* entryID,
char* outName);
/**
* Single-byte handle type identifier that will be returned by _encodeNfsFileHandle and later be
* used by _nfsFileHandleToDentry to decode the handle.
*
* note: in-tree file systems use "enum fid_type" in linux/exportfs.h; this is our own version of
* it to let _nfsFileHandleToDentry know how to decode the FhgfsNfsFileHandleV2 structure.
* note: according to linux/exportfs.h, "the filesystem must not use the value '0' or '0xff'" as
* valid types; 0xff means given handle buffer size is too small.
*/
enum FhgfsNfsHandleType
{
FhgfsNfsHandle_STANDARD_V1 = 0xf1, /* some arbitrary number that doesn't conflict with others
in linux/exportfs.h (though that wouldn't be a real
conflict) to identify our standard valid handle. */
FhgfsNfsHandle_STANDARD_V2 = 0xf2, // Adds the parentOwnerNodeID
FhgfsNfsHandle_STANDARD_V3 = 0xf3, /* Adds isBuddyMirrored */
FhgfsNfsHandle_INVALID = 0xfe, /* special meaning: error occured, invalid handle
(this is only a hint for _nfsFileHandleToDentry,
because _encodeNfsFileHandle has no way to return an
error to the calling kernel code). */
FhgfsNfsHandle_BUFTOOSMALL = 0xff, /* special meaning for callers: given handle buffer
was too small */
};
typedef enum FhgfsNfsHandleType FhgfsNfsHandleType;
/**
* Encode a file handle (typically for NFS) that can later be used to lookup an inode via
* _nfsFileHanleToDentry().
*
* @param max_len file_handle_buf array length (=> length in 4-byte words), will be set to actually
* used or desired length.
* @param parent_inode/connectable if set, this means we should try to create a connectable file
* handle, so that we can later do a parent dir lookup from it.
*
* @return FhgfsNfsHandleType_...
*/
#ifndef KERNEL_HAS_ENCODE_FH_INODE
int FhgfsOpsExport_encodeNfsFileHandle(struct dentry* dentry, __u32* file_handle_buf, int* max_len,
int connectable)
{
struct inode* inode = dentry->d_inode;
struct inode* parent_inode = dentry->d_parent->d_inode;
if (connectable)
{
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(parent_inode);
const EntryInfo* parentInfo = FhgfsInode_getEntryInfo(fhgfsParentInode);
return __FhgfsOpsExport_encodeNfsFileHandleV3(inode, file_handle_buf, max_len, parentInfo);
}
else
return __FhgfsOpsExport_encodeNfsFileHandleV3(inode, file_handle_buf, max_len, NULL);
}
#else // LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
int FhgfsOpsExport_encodeNfsFileHandle(struct inode* inode, __u32* file_handle_buf, int* max_len,
struct inode* parent_inode)
{
if (parent_inode)
{
FhgfsInode* fhgfsParentInode = BEEGFS_INODE(parent_inode);
const EntryInfo* parentInfo = FhgfsInode_getEntryInfo(fhgfsParentInode);
return __FhgfsOpsExport_encodeNfsFileHandleV3(inode, file_handle_buf, max_len, parentInfo);
}
else
return __FhgfsOpsExport_encodeNfsFileHandleV3(inode, file_handle_buf, max_len, NULL);
}
#endif // LINUX_VERSION_CODE
int __FhgfsOpsExport_encodeNfsFileHandleV3(struct inode* inode, __u32* file_handle_buf,
int* max_len, const EntryInfo* parentInfo)
{
FhgfsNfsHandleType retVal = FhgfsNfsHandle_STANDARD_V3;
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
struct FhgfsNfsFileHandleV3* fhgfsNfsHandle = (void*)file_handle_buf;
__u8* handleAsArray = (__u8 *)fhgfsNfsHandle; // Used for padding
size_t fhgfsNfsHandleLen = sizeof(struct FhgfsNfsFileHandleV3);
size_t givenHandleByteLength = (*max_len) * sizeof(__u32);
const EntryInfo* entryInfo;
bool parseParentIDRes;
bool parseEntryIDRes;
*max_len = (fhgfsNfsHandleLen + sizeof(__u32)-1) / sizeof(__u32); /* set desired/used max_len for
caller (4-byte words; "+sizeof(u32)-1" rounds up) */
// check whether given buf length is enough for our handle
if(givenHandleByteLength < fhgfsNfsHandleLen)
return FhgfsNfsHandle_BUFTOOSMALL;
// get entryInfo and serialize it in a special small format
// (normal string-based serialization would use too much buffer space)
FhgfsInode_entryInfoReadLock(fhgfsInode); // LOCK EntryInfo
entryInfo = FhgfsInode_getEntryInfo(fhgfsInode);
parseParentIDRes = __FhgfsOpsExport_parseEntryIDForNfsHandle(entryInfo->parentEntryID,
&fhgfsNfsHandle->parentEntryIDCounter, &fhgfsNfsHandle->parentEntryIDTimestamp,
&fhgfsNfsHandle->parentEntryIDNodeID);
if(unlikely(!parseParentIDRes) )
{ // parsing failed (but we have no real way to return an error)
retVal = FhgfsNfsHandle_INVALID;
goto cleanup;
}
parseEntryIDRes = __FhgfsOpsExport_parseEntryIDForNfsHandle(entryInfo->entryID,
&fhgfsNfsHandle->entryIDCounter, &fhgfsNfsHandle->entryIDTimestamp,
&fhgfsNfsHandle->entryIDNodeID);
if(unlikely(!parseEntryIDRes) )
{ // parsing failed (but we have no real way to return an error)
retVal = FhgfsNfsHandle_INVALID;
goto cleanup;
}
fhgfsNfsHandle->ownerNodeID = entryInfo->owner.node;
fhgfsNfsHandle->entryType = entryInfo->entryType;
fhgfsNfsHandle->isBuddyMirrored = EntryInfo_getIsBuddyMirrored(entryInfo);
if (parentInfo)
{
// NOTE: Does not need to be locked, as it is not a char* value
fhgfsNfsHandle->parentOwnerNodeID = parentInfo->owner.node;
}
else
fhgfsNfsHandle->parentOwnerNodeID = (NumNodeID){0};
// Pad remaining space between real file handle length and max_len with zeroes
for (size_t i = fhgfsNfsHandleLen; i < givenHandleByteLength; i++) {
handleAsArray[i] = 0x00;
}
cleanup:
FhgfsInode_entryInfoReadUnlock(fhgfsInode); // UNLOCK EntryInfo
return retVal;
}
/**
* Lookup an inode based on a file handle that was previously created via _encodeNfsFileHandle().
*
* @param fid file handle buffer
* @param fh_len fid buffer length in 4-byte words
* @param fileid_type FhgfsNfsHandleType_... (as returned by _encodeNfsFileHandle).
*/
struct dentry* FhgfsOpsExport_nfsFileHandleToDentry(struct super_block *sb, struct fid *fid,
int fh_len, int fileid_type)
{
App* app = FhgfsOps_getApp(sb);
const char* logContext = "NFS-handle-to-dentry";
FhgfsNfsFileHandleV2 fhgfsNfsHandleV2;
struct FhgfsNfsFileHandleV3 fhgfsNfsHandleV3;
struct FhgfsNfsFileHandleV3* fhgfsNfsHandle = (struct FhgfsNfsFileHandleV3*) fid->raw;
FhgfsNfsHandleType handleType = (FhgfsNfsHandleType)fileid_type;
void* rawHandle = fid->raw;
size_t fhgfsNfsHandleLen;
size_t givenHandleByteLength = fh_len * sizeof(__u32); // fh_len is in 4-byte words
static bool isFirstCall = true; // true if this method is called for the first time
FhgfsOpsHelper_logOp(Log_SPAM, app, NULL, NULL, logContext);
if (unlikely(isFirstCall) )
{
/* Our getattr functions assumes it is called after a lookup-intent and therefore does not
* validate the inode by default. However, this assumption is not true for NFS, and as result
* inode updates are never detected. NFS even caches readdir results and without inode updates
* even deleted entries are not detected. So we need to disable the getattr optimization for
* nfs exports.
*/
Config* cfg = App_getConfig(app);
Logger* log = App_getLogger(app);
Config_setTuneRefreshOnGetAttr(cfg);
Logger_logFormatted(log, Log_DEBUG, logContext,
"nfs export detected: auto enabling refresh-on-getattr");
isFirstCall = false;
}
// check if this handle is valid
if (unlikely(handleType != FhgfsNfsHandle_STANDARD_V1 &&
handleType != FhgfsNfsHandle_STANDARD_V2 &&
handleType != FhgfsNfsHandle_STANDARD_V3))
{
printk_fhgfs_debug(KERN_INFO, "%s: Called with invalid handle type: 0x%x\n",
__func__, fileid_type);
return NULL;
}
if (likely(handleType == FhgfsNfsHandle_STANDARD_V3))
fhgfsNfsHandleLen = sizeof(struct FhgfsNfsFileHandleV3);
else if (handleType == FhgfsNfsHandle_STANDARD_V2)
fhgfsNfsHandleLen = sizeof(FhgfsNfsFileHandleV2);
else
fhgfsNfsHandleLen = sizeof(FhgfsNfsFileHandleV1);
// check if given handle length is valid
if(unlikely(givenHandleByteLength < fhgfsNfsHandleLen) )
{
printk_fhgfs_debug(KERN_INFO, "%s: Called with too small handle length: "
"%d bytes; (handle type: 0x%x)\n",
__func__, (int)givenHandleByteLength, fileid_type);
return NULL;
}
if (unlikely(handleType == FhgfsNfsHandle_STANDARD_V1))
{ // old handle, generated before the update that creates V2 handles, convert V1 to V2
FhgfsNfsFileHandleV1* handleV1 = (FhgfsNfsFileHandleV1*) rawHandle;
fhgfsNfsHandleV2.entryIDCounter = handleV1->entryIDCounter;
fhgfsNfsHandleV2.entryIDNodeID = handleV1->entryIDNodeID;
fhgfsNfsHandleV2.entryIDTimestamp = handleV1->entryIDTimestamp;
fhgfsNfsHandleV2.ownerNodeID = handleV1->ownerNodeID;
fhgfsNfsHandleV2.parentEntryIDCounter = handleV1->parentEntryIDCounter;
fhgfsNfsHandleV2.parentEntryIDNodeID = handleV1->parentEntryIDNodeID;
fhgfsNfsHandleV2.parentEntryIDTimestamp = handleV1->parentEntryIDTimestamp;
fhgfsNfsHandleV2.entryType = handleV1->entryType;
fhgfsNfsHandleV2.parentOwnerNodeID = 0; // only available in V2 handles
rawHandle = &fhgfsNfsHandleV2;
handleType = FhgfsNfsHandle_STANDARD_V2;
}
if (unlikely(handleType == FhgfsNfsHandle_STANDARD_V2))
{
FhgfsNfsFileHandleV2* handleV2 = (FhgfsNfsFileHandleV2*) rawHandle;
fhgfsNfsHandleV3.entryIDCounter = handleV2->entryIDCounter;
fhgfsNfsHandleV3.entryIDNodeID.value = handleV2->entryIDNodeID;
fhgfsNfsHandleV3.entryIDTimestamp = handleV2->entryIDTimestamp;
fhgfsNfsHandleV3.ownerNodeID.value = handleV2->ownerNodeID;
fhgfsNfsHandleV3.parentEntryIDCounter = handleV2->parentEntryIDCounter;
fhgfsNfsHandleV3.parentEntryIDNodeID.value = handleV2->parentEntryIDNodeID;
fhgfsNfsHandleV3.parentEntryIDTimestamp = handleV2->parentEntryIDTimestamp;
fhgfsNfsHandleV3.entryType = handleV2->entryType;
fhgfsNfsHandleV3.parentOwnerNodeID.value = handleV2->parentOwnerNodeID;
fhgfsNfsHandleV3.isBuddyMirrored = 0;
fhgfsNfsHandle = &fhgfsNfsHandleV3;
handleType = FhgfsNfsHandle_STANDARD_V3;
}
return __FhgfsOpsExport_lookupDentryFromNfsHandle(sb, fhgfsNfsHandle, false);
}
/**
* Lookup an inode based on a file handle that was previously created via _encodeNfsFileHandle().
*
* @param fid file handle buffer
* @param fh_len fid buffer length in 4-byte words
* @param fileid_type FhgfsNfsHandleType_... (as returned by _encodeNfsFileHandle).
*
* Note: Always a V2 handle
*/
struct dentry* FhgfsOpsExport_nfsFileHandleToParent(struct super_block *sb, struct fid *fid,
int fh_len, int fileid_type)
{
App* app = FhgfsOps_getApp(sb);
const char* logContext = "NFS-handle-to-parent";
struct FhgfsNfsFileHandleV3* fhgfsNfsHandle = (struct FhgfsNfsFileHandleV3*)fid->raw;
FhgfsNfsHandleType handleType = (FhgfsNfsHandleType)fileid_type;
size_t fhgfsNfsHandleLen = sizeof(struct FhgfsNfsFileHandleV3);
size_t givenHandleByteLength = fh_len * sizeof(__u32); // fh_len is in 4-byte words
FhgfsOpsHelper_logOp(Log_SPAM, app, NULL, NULL, logContext);
// check if this handle is valid
if (unlikely(handleType != FhgfsNfsHandle_STANDARD_V2
&& handleType != FhgfsNfsHandle_STANDARD_V3))
{ // V1 handles didn't include the parentOwnerNodeID
#ifdef BEEGFS_DEBUG
if (handleType != FhgfsNfsHandle_STANDARD_V1)
printk_fhgfs_debug(KERN_INFO, "%s: Called with invalid handle type: 0x%x\n",
__func__, fileid_type);
#endif
return NULL;
}
// check if given handle length is valid
if(unlikely(givenHandleByteLength < fhgfsNfsHandleLen) )
{
printk_fhgfs_debug(KERN_INFO, "%s: Called with too small handle length: "
"%d bytes; (handle type: 0x%x)\n",
__func__, (int)givenHandleByteLength, fileid_type);
return NULL;
}
return __FhgfsOpsExport_lookupDentryFromNfsHandle(sb, fhgfsNfsHandle, true);
}
/**
* Check whether a dentry/inode for the nfs handle exists in the local cache and try server lookup
* otherwise.
*
* @param isParent if true the parent will be looked up.
*/
struct dentry* __FhgfsOpsExport_lookupDentryFromNfsHandle(struct super_block *sb,
struct FhgfsNfsFileHandleV3* fhgfsNfsHandle, bool lookupParent)
{
bool entryRes;
char* entryID = NULL;
size_t entryIDLen;
char* parentEntryID = NULL;
App* app = FhgfsOps_getApp(sb);
Logger* log = App_getLogger(app);
const char* logContext =
lookupParent ? "NFS-decode-handle-to-parent-dentry" : "NFS-decode-handle-to-dentry";
ino_t inodeHash; // (simply the inode number for us)
struct inode* inode;
FhgfsInodeComparisonInfo comparisonInfo;
EntryInfo entryInfo;
struct dentry* resDentry;
// generate entryID string
if (!lookupParent)
entryRes = __FhgfsOpsExport_entryIDFromNfsHandle(fhgfsNfsHandle->entryIDCounter,
fhgfsNfsHandle->entryIDTimestamp, fhgfsNfsHandle->entryIDNodeID, &entryID);
else
entryRes = __FhgfsOpsExport_entryIDFromNfsHandle(fhgfsNfsHandle->parentEntryIDCounter,
fhgfsNfsHandle->parentEntryIDTimestamp, fhgfsNfsHandle->parentEntryIDNodeID, &entryID);
if(unlikely(!entryRes) )
goto err_cleanup_entryids;
entryIDLen = strlen(entryID);
if (strncmp(entryID, META_ROOTDIR_ID_STR, entryIDLen) == 0)
inodeHash = BEEGFS_INODE_ROOT_INO;
else
inodeHash = FhgfsInode_generateInodeID(sb, entryID, entryIDLen);
comparisonInfo.inodeHash = inodeHash;
comparisonInfo.entryID = entryID;
inode = ilookup5(sb, inodeHash, __FhgfsOps_compareInodeID,
&comparisonInfo); // (ilookup5 calls iget() on match)
if(!inode)
{ // not found in cache => try to get it from mds
App* app = FhgfsOps_getApp(sb);
fhgfs_stat fhgfsStat;
struct kstat kstat;
FhgfsOpsErr statRes = FhgfsOpsErr_SUCCESS;
NumNodeID ownerNodeID;
FhgfsIsizeHints iSizeHints;
NumNodeID parentNodeID;
char* statParentEntryID;
unsigned int metaVersion;
if (!lookupParent)
{
// generate parentEntryID string
bool parentRes = __FhgfsOpsExport_entryIDFromNfsHandle(
fhgfsNfsHandle->parentEntryIDCounter, fhgfsNfsHandle->parentEntryIDTimestamp,
fhgfsNfsHandle->parentEntryIDNodeID, &parentEntryID);
if(unlikely(!parentRes) )
goto err_cleanup_entryids;
ownerNodeID = fhgfsNfsHandle->ownerNodeID;
}
else
{ // parentDir
ownerNodeID = fhgfsNfsHandle->parentOwnerNodeID;
/* Note: Needs to be special value to tell fhgfs-meta it is unknown!
* Must not be empty, as this would tell fhgfs-meta it is the root ID. */
parentEntryID = StringTk_strDup(EntryInfo_PARENT_ID_UNKNOWN);
}
FhgfsInode_initIsizeHints(NULL, &iSizeHints);
// init entry info
if (fhgfsNfsHandle->isBuddyMirrored)
EntryInfo_init(&entryInfo, NodeOrGroup_fromGroup(ownerNodeID.value), parentEntryID,
entryID, StringTk_strDup("<nfs_fh>"), fhgfsNfsHandle->entryType, 0);
else
EntryInfo_init(&entryInfo, NodeOrGroup_fromNode(ownerNodeID), parentEntryID, entryID,
StringTk_strDup("<nfs_fh>"), fhgfsNfsHandle->entryType, 0);
// communicate
statRes = FhgfsOpsRemoting_statAndGetParentInfo(app, &entryInfo, &fhgfsStat,
&parentNodeID, &statParentEntryID);
// the lookup of parent information may have failed. this can happen if the entry info we
// tried to stat describes a directory inode that is mirrored, but whose parent directory is
// not mirrored. similarly, an unmirrored directory with a mirrored parent directory can fail
// here. try again with the other choice of mirroring flag.
if (lookupParent && statRes == FhgfsOpsErr_PATHNOTEXISTS)
{
entryInfo.featureFlags ^= ENTRYINFO_FEATURE_BUDDYMIRRORED;
statRes = FhgfsOpsRemoting_statAndGetParentInfo(app, &entryInfo, &fhgfsStat,
&parentNodeID, &statParentEntryID);
}
if(statRes != FhgfsOpsErr_SUCCESS)
goto err_cleanup_entryinfo;
// entry found => create inode
metaVersion = fhgfsStat.metaVersion;
OsTypeConv_kstatFhgfsToOs(&fhgfsStat, &kstat);
kstat.ino = inodeHash;
inode = __FhgfsOps_newInode(sb, &kstat, 0, &entryInfo, &iSizeHints, metaVersion);
if (likely(inode) )
{
if (parentNodeID.value)
{
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
FhgfsInode_setParentNodeID(fhgfsInode, parentNodeID);
}
if (statParentEntryID)
{ // update the parentEntryID
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
// make absolute sure we use the right entryInfo
FhgfsInode_entryInfoWriteLock(fhgfsInode); // L O C K
EntryInfo_updateSetParentEntryID(&fhgfsInode->entryInfo, statParentEntryID);
FhgfsInode_entryInfoWriteUnlock(fhgfsInode); // U N L O C K
}
}
}
else
SAFE_KFREE(entryID); // ilookup5 found an existing inode, free the comparison entryID
if (unlikely(!inode) )
goto err_cleanup_entryinfo;
// (d_obtain_alias can also handle pointer error codes and NULL)
resDentry = d_obtain_alias(inode);
if (resDentry && !IS_ERR(resDentry) )
{
#ifndef KERNEL_HAS_S_D_OP
resDentry->d_op = &fhgfs_dentry_ops;
#endif // KERNEL_HAS_S_D_OP
if (unlikely(Logger_getLogLevel(log) >= Log_SPAM) )
{
FhgfsInode* fhgfsInode = BEEGFS_INODE(resDentry->d_inode);
const EntryInfo* entryInfo;
FhgfsInode_entryInfoReadLock(fhgfsInode); // L O C K fhgfsInode
entryInfo = FhgfsInode_getEntryInfo(fhgfsInode);
Logger_logFormatted(log, Log_SPAM, logContext, "result dentry inode id: %s",
entryInfo->entryID);
FhgfsInode_entryInfoReadUnlock(fhgfsInode); // U N L O C K fhgfsInode
}
}
return resDentry;
err_cleanup_entryids:
SAFE_KFREE(parentEntryID);
SAFE_KFREE(entryID);
return ERR_PTR(-ESTALE);
err_cleanup_entryinfo:
EntryInfo_uninit(&entryInfo);
return ERR_PTR(-ESTALE);
}
/**
* Parse an entryID string to get its three components.
*
* Note: META_ROOTDIR_ID_STR is a special case (all three components are set to 0).
*
* @return false on error
*/
bool __FhgfsOpsExport_parseEntryIDForNfsHandle(const char* entryID, uint32_t* outCounter,
uint32_t* outTimestamp, NumNodeID* outNodeID)
{
const int numEntryIDComponents = 3; // sscanf must find 3 components in a valid entryID string
uint32_t nodeID32; //just a tmp variable, because 16-bit outNodeID cannot be used with sscanf %X
int scanRes;
if(!strcmp(META_ROOTDIR_ID_STR, entryID) )
{ // special case: this is the root ID, which doesn't have the usual three components
*outCounter = 0;
*outTimestamp = 0;
*outNodeID = (NumNodeID){0};
return true;
}
scanRes = sscanf(entryID, "%X-%X-%X", outCounter, outTimestamp, &nodeID32);
if(unlikely(scanRes != numEntryIDComponents) )
{ // parsing failed
printk_fhgfs_debug(KERN_INFO, "%s: Parsing of entryID failed. entryID: %s\n",
__func__, entryID);
return false;
}
*outNodeID = (NumNodeID){nodeID32};
return true;
}
/**
* Generate an entryID string from the NFS handle components.
*
* @param outEntryID will be kmalloced on success and needs to be kfree'd by the caller
* @return false on error
*/
bool __FhgfsOpsExport_entryIDFromNfsHandle(uint32_t counter, uint32_t timestamp,
NumNodeID nodeID, char** outEntryID)
{
if(!counter && !timestamp && !nodeID.value)
{ // special case: root ID
*outEntryID = StringTk_strDup(META_ROOTDIR_ID_STR);
}
else
*outEntryID = StringTk_kasprintf("%X-%X-%X", counter, timestamp, (uint32_t)nodeID.value);
return (*outEntryID != NULL); // (just in case kmalloc failed)
}
/**
* getParentDentry - export_operations->get_parent function
*
* Get the directory dentry (and inode) that has childDentry
*
* Note: We do not lock childDentry's EntryInfo here, as childDentry does not have a connected
* path yet, so childDentry's EntryInfo also cannot change.
*/
struct dentry* FhgfsOpsExport_getParentDentry(struct dentry* childDentry)
{
int retVal = -ESTALE;
struct super_block* superBlock = childDentry->d_sb;
App* app = FhgfsOps_getApp(superBlock);
Logger* log = App_getLogger(app);
const char* logContext = "Export_getParentDentry";
struct inode* parentInode;
struct inode* childInode = childDentry->d_inode;
FhgfsInode* fhgfsChildInode = BEEGFS_INODE(childInode);
EntryInfo childEntryInfoCopy;
FhgfsInodeComparisonInfo comparisonInfo;
size_t parentIDLen;
const char* parentEntryID;
struct dentry* parentDentry = NULL;
FhgfsInode_entryInfoReadLock(fhgfsChildInode); // L O C K childInode
EntryInfo_dup(FhgfsInode_getEntryInfo(fhgfsChildInode), &childEntryInfoCopy);
FhgfsInode_entryInfoReadUnlock(fhgfsChildInode); // U N L O C K childInode
parentEntryID = EntryInfo_getParentEntryID(&childEntryInfoCopy);
/* NOTE: IS_ROOT() would not work, as any childDentry is not connected to its parent
* and IS_ROOT() would *always* be true here. */
if (strlen(parentEntryID) == 0)
{
/* This points to a bug, as we should never be called for the root dentry and so this means
* either root was not correctly identified or setting the parentEntryID failed */
Logger_logErrFormatted(log, Log_ERR, logContext,
"Bug: Root does not have parentEntryID set!");
retVal = -EINVAL;
goto outErr;
}
parentIDLen = strlen(parentEntryID);
comparisonInfo.entryID = parentEntryID;
if (strncmp(parentEntryID, META_ROOTDIR_ID_STR, parentIDLen) == 0)
comparisonInfo.inodeHash = BEEGFS_INODE_ROOT_INO; // root inode
else
comparisonInfo.inodeHash = FhgfsInode_generateInodeID(superBlock,
parentEntryID, strlen(parentEntryID) );
Logger_logFormatted(log, Log_SPAM, logContext, "Find inode for ID: %s inodeHash: %lu",
comparisonInfo.entryID, comparisonInfo.inodeHash);
parentInode = ilookup5(superBlock, comparisonInfo.inodeHash, __FhgfsOps_compareInodeID,
&comparisonInfo); // (ilookup5 calls iget() on match)
if(!parentInode)
{ // not found in cache => try to get it from mds
fhgfs_stat fhgfsStat;
struct kstat kstat;
FhgfsOpsErr statRes = FhgfsOpsErr_SUCCESS;
const char* fileName = StringTk_strDup("<nfs_fh>");
EntryInfo parentInfo;
unsigned int metaVersion;
NumNodeID parentNodeID = (NumNodeID){0};
char* parentEntryID = NULL;
NumNodeID parentOwnerNodeID;
FhgfsIsizeHints iSizeHints;
/* Note: Needs to be (any) special value to tell fhgfs-meta it is unknown!
* Must not be empty, as this would tell fhgfs-meta it is the root ID. */
char* grandParentID = StringTk_strDup(EntryInfo_PARENT_ID_UNKNOWN);
parentOwnerNodeID = FhgfsInode_getParentNodeID(fhgfsChildInode);
if (parentOwnerNodeID.value == 0)
{ /* Hmm, so we don't know the real ownerNodeId, we try the childs ID, but if that fails
* with FhgfsOpsErr_NOTOWNER, we would need to cycle through all meta-targets, which
* is too slow and we therefore don't do that, but hope the client can
* recover -ESTALE itself */
parentOwnerNodeID = childEntryInfoCopy.owner.node;
}
// generate parentEntryID string
if (EntryInfo_getIsBuddyMirrored(&childEntryInfoCopy))
EntryInfo_init(&parentInfo, NodeOrGroup_fromGroup(parentOwnerNodeID.value),
grandParentID, StringTk_strDup(comparisonInfo.entryID), fileName,
DirEntryType_DIRECTORY, 0);
else
EntryInfo_init(&parentInfo, NodeOrGroup_fromNode(parentOwnerNodeID), grandParentID,
StringTk_strDup(comparisonInfo.entryID), fileName, DirEntryType_DIRECTORY, 0);
// communicate
FhgfsInode_initIsizeHints(NULL, &iSizeHints);
statRes = FhgfsOpsRemoting_statAndGetParentInfo(app, &parentInfo, &fhgfsStat,
&parentNodeID, &parentEntryID);
// the lookup of parent information may have failed. this can happen if the entry info we
// tried to stat describes a directory inode that is mirrored, but whose parent directory
// is not mirrored. similarly, an unmirrored directory with a mirrored parent directory
// can fail here. try again with the other choice of mirroring flag.
if (statRes == FhgfsOpsErr_PATHNOTEXISTS)
{
parentInfo.featureFlags ^= ENTRYINFO_FEATURE_BUDDYMIRRORED;
statRes = FhgfsOpsRemoting_statAndGetParentInfo(app, &parentInfo, &fhgfsStat,
&parentNodeID, &parentEntryID);
}
if(statRes != FhgfsOpsErr_SUCCESS)
{
EntryInfo_uninit(&parentInfo);
goto outErr;
}
// entry found => create inode
metaVersion = fhgfsStat.metaVersion;
OsTypeConv_kstatFhgfsToOs(&fhgfsStat, &kstat);
kstat.ino = comparisonInfo.inodeHash;
parentInode = __FhgfsOps_newInodeWithParentID(superBlock, &kstat, 0, &parentInfo,
parentNodeID, &iSizeHints, metaVersion);
if (likely(parentInode) && parentEntryID)
{ // update the parentEntryID
FhgfsInode* parentFhgfsInode = BEEGFS_INODE(parentInode);
// make absolute sure we use the right entryInfo
FhgfsInode_entryInfoWriteLock(parentFhgfsInode); // L O C K
EntryInfo_updateSetParentEntryID(&parentFhgfsInode->entryInfo, parentEntryID);
FhgfsInode_entryInfoWriteUnlock(parentFhgfsInode); // U N L O C K
}
}
// (d_obtain_alias can also handle pointer error codes and NULL)
parentDentry = d_obtain_alias(parentInode);
if (parentDentry && !IS_ERR(parentDentry) )
{
#ifndef KERNEL_HAS_S_D_OP
parentDentry->d_op = &fhgfs_dentry_ops;
#endif // KERNEL_HAS_S_D_OP
}
// printk(KERN_INFO "%s: d_obtain_alias(inode) res: %ld\n", __func__, IS_ERR(parentDentry) );
EntryInfo_uninit(&childEntryInfoCopy);
return parentDentry;
outErr:
EntryInfo_uninit(&childEntryInfoCopy);
return ERR_PTR(retVal);
}
/**
* getName - export_operations->get_name function
*
* calls readdir on the parent until it finds an entry with
* the same entryID as the child, and returns that.
* @param dirDentry the directory in which to find a name
* @param outName a pointer to a %NAME_MAX+1 char buffer to store the name
* @param child the dentry for the child directory.
*/
int FhgfsOpsExport_getName(struct dentry* dirDentry, char *outName, struct dentry *child)
{
struct inode *dirInode = dirDentry->d_inode;
struct inode *childInode = child->d_inode;
FhgfsInode* fhgfsChildInode = BEEGFS_INODE(childInode);
EntryInfo childEntryInfoCopy;
const char* childEntryID;
int retVal;
bool findRes;
retVal = -ENOTDIR;
if (!dirInode || !S_ISDIR(dirInode->i_mode))
goto out;
retVal = -EINVAL;
if (!dirInode->i_fop)
goto out;
FhgfsInode_entryInfoReadLock(fhgfsChildInode); // LOCK childInode
EntryInfo_dup(FhgfsInode_getEntryInfo(fhgfsChildInode), &childEntryInfoCopy);
FhgfsInode_entryInfoReadUnlock(fhgfsChildInode); // UNLOCK childInode
childEntryID = EntryInfo_getEntryID(&childEntryInfoCopy);
findRes = __FhgfsOpsExport_iterateDirFindName(dirDentry, childEntryID, outName);
if (!findRes)
{
retVal = -ESTALE;
goto outUnitEntryInfo;
}
retVal = 0;
outUnitEntryInfo:
EntryInfo_uninit(&childEntryInfoCopy);
out:
return retVal;
}
/**
* Find the name of an entry with the given entryID in the directory dirDentry.
*
* Note: This uses a rather slow client-side readdir() to find the entry.
* Maybe we should add another NetMsg and do it directly on the server!
*/
bool __FhgfsOpsExport_iterateDirFindName(struct dentry* dirDentry, const char* entryID,
char* outName)
{
struct super_block* superBlock = dirDentry->d_sb;
App* app = FhgfsOps_getApp(superBlock);
Logger* log = App_getLogger(app);
const char* logContext = "FhgfsOpsExport_readdirFindName";
bool retVal = false;
struct inode* dirInode = dirDentry->d_inode;
FhgfsInode* fhgfsDirInode = BEEGFS_INODE(dirInode);
size_t contentsPos = 0;
size_t contentsLength;
StrCpyVec* dirContents;
StrCpyVec* dirContentIDs;
EntryInfo dirEntryInfoCopy;
FsDirInfo dirInfo;
FsDirInfo_init(&dirInfo, app);
dirContents = FsDirInfo_getDirContents(&dirInfo);
dirContentIDs = FsDirInfo_getEntryIDs(&dirInfo);
FhgfsInode_entryInfoReadLock(fhgfsDirInode); // L O C K EntryInfo
EntryInfo_dup(FhgfsInode_getEntryInfo(fhgfsDirInode), &dirEntryInfoCopy);
FhgfsInode_entryInfoReadUnlock(fhgfsDirInode); // U N L O C K EntryInfo
if(unlikely(Logger_getLogLevel(log) >= 5) )
{
const EntryInfo* dirInfo = FhgfsInode_getEntryInfo(fhgfsDirInode);
struct inode* inode = dirDentry->d_inode;
FhgfsOpsHelper_logOpMsg(Log_SPAM, app, dirDentry, inode, logContext,
"dir-id: %s searchID: %s", dirInfo->entryID, entryID);
}
for( ; ; ) // loop as long as we didn't find entryID and as long as the dir has entries
{
int refreshRes;
char* currentName;
const char* currentEntryID;
refreshRes = FhgfsOpsHelper_refreshDirInfoIncremental(app,
&dirEntryInfoCopy, &dirInfo, false);
if(unlikely(refreshRes) )
{ // error occurred
break;
}
contentsPos = FsDirInfo_getCurrentContentsPos(&dirInfo);
contentsLength = StrCpyVec_length(dirContents);
#if 0
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext,
"contentsPos: %lld/%lld, endOfDir: %s",
(long long)contentsPos, (long long)contentsLength,
FsDirInfo_getEndOfDir(&dirInfo) ? "yes" : "no");
#endif
// refreshDirInfoInc guarantees that we either have a valid range for current offset
// or that contentsLength is empty
if(!contentsLength)
{ // end of dir
LOG_DEBUG(log, Log_SPAM, logContext, "reached end of dir");
break;
}
currentName = StrCpyVec_at(dirContents, contentsPos);
currentEntryID = StrCpyVec_at(dirContentIDs, contentsPos);
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext,
"searchID: %s current dir-entry: %s entryID: %s ",
entryID, currentName, currentEntryID);
if(!strcmp(currentEntryID, entryID) )
{ // match found
// note: out name buf is guaranteed to be NAME_MAX+1 according to <linux/exportfs.h>
StringTk_strncpyTerminated(outName, currentName, NAME_MAX+1);
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext, "Found childName: %s", outName);
retVal = true;
break;
}
FsDirInfo_setCurrentContentsPos(&dirInfo, contentsPos + 1);
} // end of for-loop
// clean-up
FsDirInfo_uninit((FsObjectInfo*) &dirInfo);
EntryInfo_uninit(&dirEntryInfoCopy);
return retVal;
}
#endif // LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)

View File

@@ -0,0 +1,115 @@
#ifndef FHGFSOPSEXPORT_H_
#define FHGFSOPSEXPORT_H_
#include <common/Common.h> // (placed up here for LINUX_VERSION_CODE definition)
/**
* NFS export is probably not worth the backport efforts for kernels before 2.6.29
*/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
#include <linux/exportfs.h>
struct FhgfsNfsFileHandleV1;
typedef struct FhgfsNfsFileHandleV1 FhgfsNfsFileHandleV1;
struct FhgfsNfsFileHandleV2;
typedef struct FhgfsNfsFileHandleV2 FhgfsNfsFileHandleV2;
struct FhgfsNfsFileHandleV3;
extern const struct export_operations fhgfs_export_ops;
#ifndef KERNEL_HAS_ENCODE_FH_INODE
int FhgfsOpsExport_encodeNfsFileHandle(struct dentry* dentry, __u32* file_handle_buf, int* max_len,
int connectable);
#else
int FhgfsOpsExport_encodeNfsFileHandle(struct inode* inode, __u32* file_handle_buf, int* max_len,
struct inode* parent_inode);
#endif
struct dentry* FhgfsOpsExport_nfsFileHandleToDentry(struct super_block *sb, struct fid *fid,
int fh_len, int fileid_type);
struct dentry* FhgfsOpsExport_nfsFileHandleToParent(struct super_block *sb, struct fid *fid,
int fh_len, int fileid_type);
int FhgfsOpsExport_getName(struct dentry* dirDentry, char *outName, struct dentry *child);
struct dentry* __FhgfsOpsExport_lookupDentryFromNfsHandle(struct super_block *sb,
struct FhgfsNfsFileHandleV3* fhgfsNfsHandle, bool isParent);
struct dentry* FhgfsOpsExport_getParentDentry(struct dentry* childDentry);
bool __FhgfsOpsExport_parseEntryIDForNfsHandle(const char* entryID, uint32_t* outCounter,
uint32_t* outTimestamp, NumNodeID* outNodeID);
bool __FhgfsOpsExport_entryIDFromNfsHandle(uint32_t counter, uint32_t timestamp,
NumNodeID nodeID, char** outEntryID);
/**
* The structure of the handle that will be encoded by _encodeFileHandle().
*
* EntryID string format (except for "root"): <u32hex_counter>-<u32hex_timestamp>-<u16hex_nodeID>
*/
struct FhgfsNfsFileHandleV1
{
// note: fields not in natural order for better alignment and packed size
uint32_t parentEntryIDCounter; // from EntryInfo::parentEntryID
uint32_t parentEntryIDTimestamp; // from EntryInfo::parentEntryID
uint32_t entryIDCounter; // from EntryInfo::entryID
uint32_t entryIDTimestamp; // from EntryInfo::entryID
uint16_t parentEntryIDNodeID; // from EntryInfo::parentEntryID
uint16_t entryIDNodeID; // from EntryInfo::entryID
uint16_t ownerNodeID; // from EntryInfo::ownerNodeID
char entryType; // from EntryInfo::entryType
} __attribute__((packed));
/**
* The structure of the handle that will be encoded by _encodeFileHandle().
*
* EntryID string format (except for "root"): <u32hex_counter>-<u32hex_timestamp>-<u16hex_nodeID>
*
* Note: V2 adds parentOwnerNodeID
*/
struct FhgfsNfsFileHandleV2
{
// note: fields not in natural order for better alignment and packed size
uint32_t parentEntryIDCounter; // from EntryInfo::parentEntryID
uint32_t parentEntryIDTimestamp; // from EntryInfo::parentEntryID
uint32_t entryIDCounter; // from EntryInfo::entryID
uint32_t entryIDTimestamp; // from EntryInfo::entryID
uint16_t parentEntryIDNodeID; // from EntryInfo::parentEntryID
uint16_t entryIDNodeID; // from EntryInfo::entryID
uint16_t parentOwnerNodeID; // from EntryInfo::ownerNodeID -> parentEntryInfo!
uint16_t ownerNodeID; // from EntryInfo::ownerNodeID
char entryType; // from EntryInfo::entryType
} __attribute__((packed));
struct FhgfsNfsFileHandleV3
{
uint32_t parentEntryIDCounter; // from EntryInfo::parentEntryID
uint32_t parentEntryIDTimestamp; // from EntryInfo::parentEntryID
uint32_t entryIDCounter; // from EntryInfo::entryID
uint32_t entryIDTimestamp; // from EntryInfo::entryID
NumNodeID parentEntryIDNodeID; // from EntryInfo::parentEntryID
NumNodeID entryIDNodeID; // from EntryInfo::entryID
NumNodeID parentOwnerNodeID; // from EntryInfo::ownerNodeID -> parentEntryInfo!
NumNodeID ownerNodeID; // from EntryInfo::ownerNodeID
char entryType; // from EntryInfo::entryType
uint8_t isBuddyMirrored; // from EntryInfo::isBuddyMirrored
} __attribute__((packed));
#endif // LINUX_VERSION_CODE
#endif /* FHGFSOPSEXPORT_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,205 @@
#ifndef FHGFSOPSFILE_H_
#define FHGFSOPSFILE_H_
#include <app/App.h>
#include <common/threading/Mutex.h>
#include <common/toolkit/Time.h>
#include <net/filesystem/FhgfsOpsRemoting.h>
#include <filesystem/FsDirInfo.h>
#include <filesystem/FsFileInfo.h>
#include <toolkit/NoAllocBufferStore.h>
#include <common/toolkit/MetadataTk.h>
#include <common/Common.h>
#include "FhgfsOps_versions.h"
#include "FhgfsOpsInode.h"
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
#ifndef SEEK_SET
#define SEEK_SET 0 /* seek relative to beginning of file */
#define SEEK_CUR 1 /* seek relative to current file position */
#define SEEK_END 2 /* seek relative to end of file */
#endif
// forward declaration
struct App;
extern struct file_operations fhgfs_file_buffered_ops;
extern struct file_operations fhgfs_file_pagecache_ops;
extern struct file_operations fhgfs_dir_ops;
extern struct address_space_operations fhgfs_address_ops;
extern struct address_space_operations fhgfs_address_pagecache_ops;
extern loff_t FhgfsOps_llseekdir(struct file *file, loff_t offset, int origin);
extern loff_t FhgfsOps_llseek(struct file *file, loff_t offset, int origin);
extern int FhgfsOps_opendirIncremental(struct inode* inode, struct file* file);
extern int FhgfsOps_releasedir(struct inode* inode, struct file* file);
#ifdef KERNEL_HAS_ITERATE_DIR
extern int FhgfsOps_iterateIncremental(struct file* file, struct dir_context* ctx);
#else
extern int FhgfsOps_readdirIncremental(struct file* file, void* buf, filldir_t filldir);
#endif // LINUX_VERSION_CODE
extern int FhgfsOps_open(struct inode* inode, struct file* file);
extern int FhgfsOps_openReferenceHandle(App* app, struct inode* inode, struct file* file,
unsigned openFlags, LookupIntentInfoOut* lookupInfo, uint32_t* outVersion);
extern int FhgfsOps_release(struct inode* inode, struct file* file);
#ifdef KERNEL_HAS_FSYNC_RANGE /* added in vanilla 3.1 */
int FhgfsOps_fsync(struct file* file, loff_t start, loff_t end, int datasync);
#elif !defined(KERNEL_HAS_FSYNC_DENTRY)
int FhgfsOps_fsync(struct file* file, int datasync);
#else
/* LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,34) */
int FhgfsOps_fsync(struct file* file, struct dentry* dentry, int datasync);
#endif // LINUX_VERSION_CODE
extern int __FhgfsOps_flush(App* app, struct file *file, bool discardCacheOnError,
bool forceRemoteFlush, bool checkSession, bool isClose);
extern int FhgfsOps_flock(struct file* file, int cmd, struct file_lock* fileLock);
extern int FhgfsOps_lock(struct file* file, int cmd, struct file_lock* fileLock);
extern int FhgfsOps_readlink(struct dentry* dentry, char __user* buf, int size);
extern ssize_t FhgfsOps_read(struct file* file, char __user *buf, size_t size,
loff_t* offsetPointer);
extern ssize_t FhgfsOps_write(struct file* file, const char __user *buf, size_t size,
loff_t* offsetPointer);
ssize_t FhgfsOps_read_iter(struct kiocb *iocb, struct iov_iter *to);
ssize_t FhgfsOps_write_iter(struct kiocb *iocb, struct iov_iter *from);
extern int FhgfsOps_mmap(struct file *, struct vm_area_struct *);
extern ssize_t FhgfsOps_directIO(struct kiocb *iocb, struct iov_iter *iter);
extern int FhgfsOps_releaseCancelLocks(struct inode* inode, struct file* file);
extern ssize_t __FhgfsOps_readSparse(struct file* file, struct iov_iter *iter, size_t size,
loff_t offset);
// getters & setters
static inline FsObjectInfo* __FhgfsOps_getObjectInfo(struct file* file);
static inline FsDirInfo* __FhgfsOps_getDirInfo(struct file* file);
static inline void __FhgfsOps_setDirInfo(FsDirInfo* dirInfo, struct file* outFile);
static inline FsFileInfo* __FhgfsOps_getFileInfo(struct file* file);
static inline void __FhgfsOps_setFileInfo(FsFileInfo* fileInfo, struct file* outFile);
static inline int __FhgfsOps_getCurrentLockPID(void);
static inline int64_t __FhgfsOps_getCurrentLockFD(struct file* file);
FsObjectInfo* __FhgfsOps_getObjectInfo(struct file* file)
{
return (FsObjectInfo*)file->private_data;
}
FsDirInfo* __FhgfsOps_getDirInfo(struct file* file)
{
return (FsDirInfo*)file->private_data;
}
void __FhgfsOps_setDirInfo(FsDirInfo* dirInfo, struct file* outFile)
{
outFile->private_data = dirInfo;
}
FsFileInfo* __FhgfsOps_getFileInfo(struct file* file)
{
return (FsFileInfo*)file->private_data;
}
void __FhgfsOps_setFileInfo(FsFileInfo* fileInfo, struct file* outFile)
{
outFile->private_data = fileInfo;
}
/**
* @return lock pid of the current process (which is _not_ current->pid)
*/
int __FhgfsOps_getCurrentLockPID(void)
{
/* note: tgid (not current->pid) is the actual equivalent of the number that getpid() returns to
user-space and is also the thing that is assigned to fl_pid in "<kernel>/fs/locks.c" */
return current->tgid;
}
/**
* @return virtual file descriptor for entry locking (which is _not_ the user-space fd)
*/
int64_t __FhgfsOps_getCurrentLockFD(struct file* file)
{
/* note: we can't get the user-space fd here and the rest of the kernel entry locking routines
in "<kernel>/fs/locks.c" also uses the struct file pointer for comparison instead, so we
return that one here. */
return (size_t)file;
}
#ifdef KERNEL_WRITE_BEGIN_USES_FOLIO
typedef struct folio* beegfs_pgfol_t;
#else
typedef struct page* beegfs_pgfol_t;
#endif
#ifdef KERNEL_WRITE_BEGIN_HAS_FLAGS
#define BEEGFS_HAS_WRITE_FLAGS 1
#else
#define BEEGFS_HAS_WRITE_FLAGS 0
#endif
/**
* Converts a struct page* into a beegfs_pgfol_t, which may be a folio* or page*.
*/
static inline beegfs_pgfol_t beegfs_to_pgfol(struct page *page)
{
#ifdef KERNEL_WRITE_BEGIN_USES_FOLIO
return page_folio(page);
#else
return page;
#endif
}
/**
* Retrieves the struct page* from a beegfs_pgfol_t (whether folio or page).
*/
static inline struct page* beegfs_get_page(beegfs_pgfol_t pgfol)
{
#ifdef KERNEL_WRITE_BEGIN_USES_FOLIO
return &pgfol->page;
#else
return pgfol;
#endif
}
/**
* Wrapper for grab_cache_page_write_begin that accounts for whether the kernel
* expects a flags parameter or not.
*/
static inline struct page* beegfs_grab_cache_page(struct address_space* mapping,
pgoff_t index,
unsigned flags)
{
#ifdef KERNEL_WRITE_BEGIN_HAS_FLAGS
return grab_cache_page_write_begin(mapping, index, flags);
#else
IGNORE_UNUSED_VARIABLE(flags);
return grab_cache_page_write_begin(mapping, index);
#endif
}
#endif /*FHGFSOPSFILE_H_*/

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
#ifndef FhgfsOpsFileNative_h_bI2n6XRVSaWCQHxNdwQA0I
#define FhgfsOpsFileNative_h_bI2n6XRVSaWCQHxNdwQA0I
#include <linux/fs.h>
extern const struct file_operations fhgfs_file_native_ops;
extern const struct address_space_operations fhgfs_addrspace_native_ops;
extern bool beegfs_native_init(void);
extern void beegfs_native_release(void);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,169 @@
#ifndef FHGFSOPSHELPER_H_
#define FHGFSOPSHELPER_H_
#include <app/App.h>
#include <common/storage/StorageErrors.h>
#include <common/toolkit/MetadataTk.h>
#include <common/Common.h>
#include <components/InternodeSyncer.h>
#include <filesystem/FsDirInfo.h>
#include <filesystem/FsFileInfo.h>
#include <net/filesystem/FhgfsOpsRemoting.h>
#ifdef LOG_DEBUG_MESSAGES
/**
* Print debug messages. Used to trace functions which are frequently called and therefore
* has to be exlicitly enabled at compilation time.
* Has to be a macro due to usage of __VA_ARGS__.
*/
#define FhgfsOpsHelper_logOpDebug(app, dentry, inode, logContext, msgStr, ...) \
FhgfsOpsHelper_logOpMsg(Log_SPAM, app, dentry, inode, logContext, msgStr, ##__VA_ARGS__)
#else
// no debug build, so those debug messages disabled at all
#define FhgfsOpsHelper_logOpDebug(app, dentry, inode, logContext, msgStr, ...)
#endif // LOG_DEBUG_MESSAGES
extern void FhgfsOpsHelper_logOpMsg(int level, App* app, struct dentry* dentry, struct inode* inode,
const char *logContext, const char *msgStr, ...);
extern int FhgfsOpsHelper_refreshDirInfoIncremental(App* app, const EntryInfo* entryInfo,
FsDirInfo* dirInfo, bool forceUpdate);
extern FhgfsOpsErr FhgfsOpsHelper_flushCache(App* app, FhgfsInode* fhgfsInode,
bool discardCacheOnError);
extern FhgfsOpsErr FhgfsOpsHelper_flushCacheNoWait(App* app, FhgfsInode* fhgfsInode,
bool discardCacheOnError);
extern ssize_t FhgfsOpsHelper_writeCached(struct iov_iter *iter, size_t size,
loff_t offset, FhgfsInode* fhgfsInode, FsFileInfo* fileInfo, RemotingIOInfo* ioInfo);
extern ssize_t FhgfsOpsHelper_readCached(struct iov_iter *iter, size_t size,
loff_t offset, FhgfsInode* fhgfsInode, FsFileInfo* fileInfo, RemotingIOInfo* ioInfo);
extern FhgfsOpsErr __FhgfsOpsHelper_flushCacheUnlocked(App* app, FhgfsInode* fhgfsInode,
bool discardCacheOnError);
extern ssize_t __FhgfsOpsHelper_writeCacheFlushed(struct iov_iter *iter, size_t size,
loff_t offset, FhgfsInode* fhgfsInode, FsFileInfo* fileInfo, RemotingIOInfo* ioInfo);
extern ssize_t __FhgfsOpsHelper_readCacheFlushed(struct iov_iter *iter, size_t size,
loff_t offset, FhgfsInode* fhgfsInode, FsFileInfo* fileInfo, RemotingIOInfo* ioInfo);
extern void __FhgfsOpsHelper_discardCache(App* app, FhgfsInode* fhgfsInode);
extern FhgfsOpsErr FhgfsOpsHelper_getAppendLock(FhgfsInode* inode, RemotingIOInfo* ioInfo);
extern FhgfsOpsErr FhgfsOpsHelper_releaseAppendLock(FhgfsInode* inode, RemotingIOInfo* ioInfo);
ssize_t FhgfsOpsHelper_appendfileVecOffset(FhgfsInode* fhgfsInode, struct iov_iter *iter,
size_t count, RemotingIOInfo* ioInfo, loff_t offsetFromEnd, loff_t* outNewOffset);
extern FhgfsOpsErr FhgfsOpsHelper_readOrClearUser(App* app, struct iov_iter *iter, size_t size,
loff_t offset, FsFileInfo* fileInfo, RemotingIOInfo* ioInfo);
extern void FhgfsOpsHelper_getRelativeLinkStr(const char* pathFrom, const char* pathTo,
char** pathRelativeTo);
extern int FhgfsOpsHelper_symlink(App* app, const EntryInfo* parentInfo, const char* to,
struct CreateInfo *createInfo, EntryInfo* outEntryInfo);
extern ssize_t FhgfsOpsHelper_readStateless(App* app, const EntryInfo* entryInfo,
struct iov_iter *iter, size_t size, loff_t offset);
extern ssize_t FhgfsOpsHelper_writeStateless(App* app, const EntryInfo* entryInfo,
struct iov_iter *iter, size_t size, loff_t offset, unsigned uid, unsigned gid);
// inliners
static inline void FhgfsOpsHelper_logOp(int level, App* app, struct dentry* dentry,
struct inode* inode, const char *logContext);
static inline FhgfsOpsErr FhgfsOpsHelper_closefileWithAsyncRetry(const EntryInfo* entryInfo,
RemotingIOInfo* ioInfo, struct FileEvent* event);
static inline FhgfsOpsErr FhgfsOpsHelper_unlockEntryWithAsyncRetry(const EntryInfo* entryInfo,
RWLock* eiRLock, RemotingIOInfo* ioInfo, int64_t clientFD);
static inline FhgfsOpsErr FhgfsOpsHelper_unlockRangeWithAsyncRetry(const EntryInfo* entryInfo,
RWLock* eiRLock, RemotingIOInfo* ioInfo, int ownerPID);
/**
* Reads a symlink.
*
* @return number of read bytes or negative linux error code
*/
static inline int FhgfsOpsHelper_readlink_kernel(App* app, const EntryInfo* entryInfo,
char *buf, int size)
{
struct iov_iter *iter = STACK_ALLOC_BEEGFS_ITER_KVEC(buf, size, READ);
return FhgfsOpsHelper_readStateless(app, entryInfo, iter, size, 0);
}
/**
* Wrapper function for FhgfsOpsHelper_logOpMsg(), just skips msgStr and sets it to NULL
*/
void FhgfsOpsHelper_logOp(int level, App* app, struct dentry* dentry, struct inode* inode,
const char *logContext)
{
Logger* log = App_getLogger(app);
// check the log level here, as this function is inlined
if (likely(level > Logger_getLogLevel(log) ) )
return;
FhgfsOpsHelper_logOpMsg(level, app, dentry, inode, logContext, NULL);
}
/**
* Call close remoting and add close operation to the corresponding async retry queue if a
* communucation error occurred.
*
* @param entryInfo will be copied
* @param ioInfo will be copied
* @return remoting result (independent of whether the operation was added for later retry)
*/
FhgfsOpsErr FhgfsOpsHelper_closefileWithAsyncRetry(const EntryInfo* entryInfo,
RemotingIOInfo* ioInfo, struct FileEvent* event)
{
FhgfsOpsErr closeRes;
closeRes = FhgfsOpsRemoting_closefile(entryInfo, ioInfo, event);
if( (closeRes == FhgfsOpsErr_COMMUNICATION) || (closeRes == FhgfsOpsErr_INTERRUPTED) )
{ // comm error => add for later retry
InternodeSyncer* syncer = App_getInternodeSyncer(ioInfo->app);
InternodeSyncer_delayedCloseAdd(syncer, entryInfo, ioInfo, event);
}
else if (event)
FileEvent_uninit(event);
return closeRes;
}
FhgfsOpsErr FhgfsOpsHelper_unlockEntryWithAsyncRetry(const EntryInfo* entryInfo, RWLock* eiRLock,
RemotingIOInfo* ioInfo, int64_t clientFD)
{
FhgfsOpsErr unlockRes = FhgfsOpsRemoting_flockEntryEx(entryInfo, eiRLock, ioInfo->app,
ioInfo->fileHandleID, clientFD, 0, ENTRYLOCKTYPE_CANCEL, true);
if( (unlockRes == FhgfsOpsErr_COMMUNICATION) || (unlockRes == FhgfsOpsErr_INTERRUPTED) )
{ // comm error => add for later retry
InternodeSyncer* syncer = App_getInternodeSyncer(ioInfo->app);
InternodeSyncer_delayedEntryUnlockAdd(syncer, entryInfo, ioInfo, clientFD);
}
return unlockRes;
}
FhgfsOpsErr FhgfsOpsHelper_unlockRangeWithAsyncRetry(const EntryInfo* entryInfo, RWLock* eiRLock,
RemotingIOInfo* ioInfo, int ownerPID)
{
FhgfsOpsErr unlockRes = FhgfsOpsRemoting_flockRangeEx(entryInfo, eiRLock, ioInfo->app,
ioInfo->fileHandleID, ownerPID, ENTRYLOCKTYPE_CANCEL, 0, ~0ULL, true);
if( (unlockRes == FhgfsOpsErr_COMMUNICATION) || (unlockRes == FhgfsOpsErr_INTERRUPTED) )
{ // comm error => add for later retry
InternodeSyncer* syncer = App_getInternodeSyncer(ioInfo->app);
InternodeSyncer_delayedRangeUnlockAdd(syncer, entryInfo, ioInfo, ownerPID);
}
return unlockRes;
}
#endif /*FHGFSOPSHELPER_H_*/

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,513 @@
#ifndef FHGFSOPSINODE_H_
#define FHGFSOPSINODE_H_
#include <app/App.h>
#include <common/threading/Mutex.h>
#include <common/toolkit/Time.h>
#include <net/filesystem/FhgfsOpsRemoting.h>
#include <filesystem/FsDirInfo.h>
#include <filesystem/FsFileInfo.h>
#include <toolkit/NoAllocBufferStore.h>
#include <common/Common.h>
#include "FhgfsInode.h"
#include "FhgfsOps_versions.h"
#include <linux/fs.h>
#include <linux/vfs.h>
// forward declaration
struct App;
struct FhgfsInodeComparisonInfo;
typedef struct FhgfsInodeComparisonInfo FhgfsInodeComparisonInfo;
#if defined(KERNEL_HAS_CURRENT_TIME_SPEC64)
typedef struct timespec64 inode_timespec;
#else
typedef struct timespec inode_timespec;
#endif
#if ! defined(KERNEL_HAS_INODE_GET_SET_CTIME)
/* These functions have been adapted from kernel code to work for older kernels.
The function signatures were introduced in version ~6.5 */
static inline time64_t inode_get_ctime_sec(const struct inode *inode)
{
return inode->i_ctime.tv_sec;
}
static inline long inode_get_ctime_nsec(const struct inode *inode)
{
return inode->i_ctime.tv_nsec;
}
static inline inode_timespec inode_get_ctime(const struct inode *inode)
{
return inode->i_ctime;
}
static inline inode_timespec inode_set_ctime_to_ts(struct inode *inode,
inode_timespec ts)
{
inode->i_ctime = ts;
return ts;
}
static inline inode_timespec inode_set_ctime(struct inode *inode,
time64_t sec, long nsec)
{
inode_timespec ts = { .tv_sec = sec,
.tv_nsec = nsec };
return inode_set_ctime_to_ts(inode, ts);
}
#endif
#if ! defined(KERNEL_HAS_INODE_GET_SET_CTIME_MTIME_ATIME)
/* These functions have been adapted from kernel code to work for older kernels.
The function signatures were introduced in version ~6.6 */
static inline time64_t inode_get_atime_sec(const struct inode *inode)
{
return inode->i_atime.tv_sec;
}
static inline long inode_get_atime_nsec(const struct inode *inode)
{
return inode->i_atime.tv_nsec;
}
static inline inode_timespec inode_get_atime(const struct inode *inode)
{
return inode->i_atime;
}
static inline inode_timespec inode_set_atime_to_ts(struct inode *inode,
inode_timespec ts)
{
inode->i_atime = ts;
return ts;
}
static inline inode_timespec inode_set_atime(struct inode *inode,
time64_t sec, long nsec)
{
inode_timespec ts = { .tv_sec = sec,
.tv_nsec = nsec };
return inode_set_atime_to_ts(inode, ts);
}
static inline time64_t inode_get_mtime_sec(const struct inode *inode)
{
return inode->i_mtime.tv_sec;
}
static inline long inode_get_mtime_nsec(const struct inode *inode)
{
return inode->i_mtime.tv_nsec;
}
static inline inode_timespec inode_get_mtime(const struct inode *inode)
{
return inode->i_mtime;
}
static inline inode_timespec inode_set_mtime_to_ts(struct inode *inode,
inode_timespec ts)
{
inode->i_mtime = ts;
return ts;
}
static inline inode_timespec inode_set_mtime(struct inode *inode,
time64_t sec, long nsec)
{
inode_timespec ts = { .tv_sec = sec,
.tv_nsec = nsec };
return inode_set_mtime_to_ts(inode, ts);
}
#endif
static inline void inode_set_mc_time(struct inode *inode,
inode_timespec ts)
{
inode_set_mtime_to_ts(inode, ts);
inode_set_ctime_to_ts(inode, ts);
}
#ifndef KERNEL_HAS_ATOMIC_OPEN
extern struct dentry* FhgfsOps_lookupIntent(struct inode* parentDir, struct dentry* dentry,
struct nameidata* nameidata);
#else
extern struct dentry* FhgfsOps_lookupIntent(struct inode* parentDir, struct dentry* dentry,
unsigned flags);
#endif // KERNEL_HAS_ATOMIC_OPEN
#ifdef KERNEL_HAS_STATX
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
extern int FhgfsOps_getattr(struct mnt_idmap* idmap, const struct path* path,
struct kstat* kstat, u32 request_mask, unsigned int query_flags);
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
extern int FhgfsOps_getattr(struct user_namespace* ns, const struct path* path,
struct kstat* kstat, u32 request_mask, unsigned int query_flags);
#else
extern int FhgfsOps_getattr(const struct path* path, struct kstat* kstat, u32 request_mask,
unsigned int query_flags);
#endif
#else
extern int FhgfsOps_getattr(struct vfsmount* mnt, struct dentry* dentry, struct kstat* kstat);
#endif
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
extern int FhgfsOps_setattr(struct mnt_idmap* idmap, struct dentry* dentry, struct iattr* iattr);
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
extern int FhgfsOps_setattr(struct user_namespace* ns, struct dentry* dentry, struct iattr* iattr);
#else
extern int FhgfsOps_setattr(struct dentry* dentry, struct iattr* iattr);
#endif
extern ssize_t FhgfsOps_listxattr(struct dentry* dentry, char* value, size_t size);
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
extern ssize_t FhgfsOps_getxattr(struct dentry* dentry, const char* name, void* value, size_t size);
extern int FhgfsOps_setxattr(struct dentry* dentry, const char* name, const void* value,
size_t size, int flags);
#else
extern ssize_t FhgfsOps_getxattr(struct inode* inode, const char* name, void* value, size_t size);
extern int FhgfsOps_setxattr(struct inode* inode, const char* name, const void* value,
size_t size, int flags);
#endif // KERNEL_HAS_DENTRY_XATTR_HANDLER
extern int FhgfsOps_removexattr(struct dentry* dentry, const char* name);
extern int FhgfsOps_removexattrInode(struct inode* inode, const char* name);
#if defined(KERNEL_HAS_GET_INODE_ACL)
extern struct posix_acl* FhgfsOps_get_inode_acl(struct inode* inode, int type, bool rcu);
#endif
#ifdef KERNEL_HAS_GET_ACL
#if defined(KERNEL_HAS_POSIX_GET_ACL_IDMAP)
extern struct posix_acl * FhgfsOps_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type);
#elif defined(KERNEL_HAS_POSIX_GET_ACL_NS)
extern struct posix_acl * FhgfsOps_get_acl(struct user_namespace *userns, struct dentry *dentry, int type);
#elif defined(KERNEL_POSIX_GET_ACL_HAS_RCU)
extern struct posix_acl* FhgfsOps_get_acl(struct inode* inode, int type, bool rcu);
#else
extern struct posix_acl* FhgfsOps_get_acl(struct inode* inode, int type);
#endif
int FhgfsOps_aclChmod(struct iattr* iattr, struct dentry* dentry);
#endif
#if defined(KERNEL_HAS_SET_ACL)
#if defined(KERNEL_HAS_SET_ACL_DENTRY)
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
extern int FhgfsOps_set_acl(struct mnt_idmap* mnt_userns, struct dentry* dentry,
struct posix_acl* acl, int type);
#else
extern int FhgfsOps_set_acl(struct user_namespace* mnt_userns, struct dentry* dentry,
struct posix_acl* acl, int type);
#endif
#else
#if defined(KERNEL_HAS_SET_ACL_NS_INODE)
extern int FhgfsOps_set_acl(struct user_namespace* mnt_userns, struct inode* inode,
struct posix_acl* acl, int type);
#else
extern int FhgfsOps_set_acl(struct inode* inode, struct posix_acl* acl, int type);
#endif
#endif //KERNEL_HAS_SET_ACL_DENTRY
#endif // KERNEL_HAS_SET_ACL
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
extern int FhgfsOps_mkdir(struct mnt_idmap* idmap, struct inode* dir,
struct dentry* dentry, umode_t mode);
extern int FhgfsOps_mknod(struct mnt_idmap* idmap, struct inode* dir,
struct dentry* dentry, umode_t mode, dev_t dev);
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
extern int FhgfsOps_mkdir(struct user_namespace* mnt_userns, struct inode* dir,
struct dentry* dentry, umode_t mode);
extern int FhgfsOps_mknod(struct user_namespace* mnt_userns, struct inode* dir,
struct dentry* dentry, umode_t mode, dev_t dev);
#elif defined(KERNEL_HAS_UMODE_T)
extern int FhgfsOps_mkdir(struct inode* dir, struct dentry* dentry, umode_t mode);
extern int FhgfsOps_mknod(struct inode* dir, struct dentry* dentry, umode_t mode, dev_t dev);
#else
extern int FhgfsOps_mkdir(struct inode* dir, struct dentry* dentry, int mode);
extern int FhgfsOps_mknod(struct inode* dir, struct dentry* dentry, int mode, dev_t dev);
#endif
#if defined KERNEL_HAS_ATOMIC_OPEN
int FhgfsOps_atomicOpen(struct inode* dir, struct dentry* dentry, struct file* file,
unsigned openFlags, umode_t createMode
#ifndef FMODE_CREATED
, int* outOpenedFlags
#endif
);
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
extern int FhgfsOps_createIntent(struct mnt_idmap* idmap, struct inode* dir,
struct dentry* dentry, umode_t mode, bool isExclusiveCreate);
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
extern int FhgfsOps_createIntent(struct user_namespace* mnt_userns, struct inode* dir,
struct dentry* dentry, umode_t mode, bool isExclusiveCreate);
#else
extern int FhgfsOps_createIntent(struct inode* dir, struct dentry* dentry, umode_t mode,
bool isExclusiveCreate);
#endif
#elif defined KERNEL_HAS_UMODE_T
extern int FhgfsOps_createIntent(struct inode* dir, struct dentry* dentry, umode_t mode,
struct nameidata* nameidata);
#else
extern int FhgfsOps_createIntent(struct inode* dir, struct dentry* dentry, int mode,
struct nameidata* nameidata);
#endif // KERNEL_HAS_ATOMIC_OPEN
extern int FhgfsOps_rmdir(struct inode* dir, struct dentry* dentry);
extern int FhgfsOps_unlink(struct inode* dir, struct dentry* dentry);
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
extern int FhgfsOps_symlink(struct mnt_idmap* idmap, struct inode* dir,
struct dentry* dentry, const char* to);
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
extern int FhgfsOps_symlink(struct user_namespace* mnt_userns, struct inode* dir,
struct dentry* dentry, const char* to);
#else
extern int FhgfsOps_symlink(struct inode* dir, struct dentry* dentry, const char* to);
#endif
extern int FhgfsOps_link(struct dentry* dentryFrom, struct inode* inode, struct dentry* dentryTo);
extern int FhgfsOps_hardlinkAsSymlink(struct dentry* oldDentry, struct inode* dir,
struct dentry* newDentry);
#if defined KERNEL_HAS_GET_LINK
extern const char* FhgfsOps_get_link(struct dentry* dentry, struct inode* inode,
struct delayed_call* done);
#elif defined(KERNEL_HAS_FOLLOW_LINK_COOKIE)
extern const char* FhgfsOps_follow_link(struct dentry* dentry, void** cookie);
extern void FhgfsOps_put_link(struct inode* inode, void* cookie);
#else
extern void* FhgfsOps_follow_link(struct dentry* dentry, struct nameidata* nd);
extern void FhgfsOps_put_link(struct dentry* dentry, struct nameidata* nd, void* p);
#endif
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
extern int FhgfsOps_rename(struct mnt_idmap* idmap, struct inode* inodeDirFrom,
struct dentry* dentryFrom, struct inode* inodeDirTo, struct dentry* dentryTo, unsigned flags);
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
extern int FhgfsOps_rename(struct user_namespace* mnt_userns, struct inode* inodeDirFrom,
struct dentry* dentryFrom, struct inode* inodeDirTo, struct dentry* dentryTo, unsigned flags);
#elif defined(KERNEL_HAS_RENAME_FLAGS)
extern int FhgfsOps_rename(struct inode* inodeDirFrom, struct dentry* dentryFrom,
struct inode* inodeDirTo, struct dentry* dentryTo, unsigned flags);
#else
extern int FhgfsOps_rename(struct inode* inodeDirFrom, struct dentry* dentryFrom,
struct inode* inodeDirTo, struct dentry* dentryTo);
#endif
extern int FhgfsOps_vmtruncate(struct inode* inode, loff_t offset);
extern bool FhgfsOps_initInodeCache(void);
extern void FhgfsOps_destroyInodeCache(void);
extern struct inode* FhgfsOps_alloc_inode(struct super_block* sb);
extern void FhgfsOps_destroy_inode(struct inode* inode);
extern struct inode* __FhgfsOps_newInodeWithParentID(struct super_block* sb, struct kstat* kstat,
dev_t dev, EntryInfo* entryInfo, NumNodeID parentNodeID, FhgfsIsizeHints* iSizeHints, unsigned int metaVersion);
extern int __FhgfsOps_instantiateInode(struct dentry* dentry, EntryInfo* entryInfo,
fhgfs_stat* fhgfsStat, FhgfsIsizeHints* iSizeHints);
int __FhgfsOps_compareInodeID(struct inode* cachedInode, void* newInodeInfo);
int __FhgfsOps_initNewInodeDummy(struct inode* newInode, void* newInodeInfo);
static inline int __FhgfsOps_refreshInode(App* app, struct inode* inode, fhgfs_stat* fhgfsStat,
FhgfsIsizeHints* iSizeHints);
extern int __FhgfsOps_doRefreshInode(App* app, struct inode* inode, fhgfs_stat* fhgfsStat,
FhgfsIsizeHints* iSizeHints, bool noFlush);
extern int __FhgfsOps_revalidateMapping(App* app, struct inode* inode);
extern int __FhgfsOps_flushInodeFileCache(App* app, struct inode* inode);
extern void __FhgfsOps_clearInodeStripePattern(App* app, struct inode* inode);
// inliners
static inline void __FhgfsOps_applyStatDataToInode(struct kstat* kstat, FhgfsIsizeHints* iSizeHints,
struct inode* outInode);
static inline void __FhgfsOps_applyStatDataToInodeUnlocked(struct kstat* kstat,
FhgfsIsizeHints* iSizeHints, struct inode* outInode);
static inline void __FhgfsOps_applyStatAttribsToInode(struct kstat* kstat, struct inode* outInode);
static inline void __FhgfsOps_applyStatSizeToInode(struct kstat* kstat,
FhgfsIsizeHints* iSizeHints, struct inode* inOutInode);
static inline struct inode* __FhgfsOps_newInode(struct super_block* sb, struct kstat* kstat,
dev_t dev, EntryInfo* entryInfo, FhgfsIsizeHints* iSizeHints, unsigned int metaVersion);
static inline bool __FhgfsOps_isPagedMode(struct super_block* sb);
/**
* This structure is passed to _compareInodeID().
*/
struct FhgfsInodeComparisonInfo
{
ino_t inodeHash; // (=> inode::i_ino)
const char* entryID;
};
/**
* Note: acquires i_lock spinlock to protect i_size and i_blocks
*/
void __FhgfsOps_applyStatDataToInode(struct kstat* kstat, FhgfsIsizeHints* iSizeHints,
struct inode* outInode)
{
__FhgfsOps_applyStatAttribsToInode(kstat, outInode);
spin_lock(&outInode->i_lock); // I _ L O C K
__FhgfsOps_applyStatSizeToInode(kstat, iSizeHints, outInode);
spin_unlock(&outInode->i_lock); // I _ U N L O C K
}
/**
* Note: Caller must hold i_lock.
*/
void __FhgfsOps_applyStatDataToInodeUnlocked(struct kstat* kstat, FhgfsIsizeHints* iSizeHints,
struct inode* outInode)
{
__FhgfsOps_applyStatAttribsToInode(kstat, outInode);
__FhgfsOps_applyStatSizeToInode(kstat, iSizeHints, outInode);
}
/**
* Note: Don't call this directly - use the _applyStatDataToInode... wrappers.
*/
void __FhgfsOps_applyStatAttribsToInode(struct kstat* kstat, struct inode* outInode)
{
App* app = FhgfsOps_getApp(outInode->i_sb);
Config* cfg = App_getConfig(app);
// remote attribs (received from nodes)
outInode->i_mode = kstat->mode;
outInode->i_uid = kstat->uid;
outInode->i_gid = kstat->gid;
inode_set_atime_to_ts(outInode, kstat->atime);
inode_set_mtime_to_ts(outInode, kstat->mtime);
inode_set_ctime_to_ts(outInode, kstat->ctime);
set_nlink(outInode, kstat->nlink);
outInode->i_blkbits = Config_getTuneInodeBlockBits(cfg);
}
/**
* Note: Don't call this directly - use the _applyStatDataToInode... wrappers.
* Note: Caller must hold i_lock.
*
* @param iSizeHints might be NULL
*/
void __FhgfsOps_applyStatSizeToInode(struct kstat* kstat, FhgfsIsizeHints* iSizeHints,
struct inode* inOutInode)
{
FhgfsInode* fhgfsInode = BEEGFS_INODE(inOutInode);
if (S_ISREG(inOutInode->i_mode) && FhgfsInode_getHasPageWriteFlag(fhgfsInode) )
{
loff_t oldSize = i_size_read(inOutInode);
uint64_t lastWriteBackOrIsizeWriteTime;
/* We don't allow to decrease the file size in paged mode, as
* this may/will cause data corruption (zeros/holes instead of real data).
* The detection when to apply the server i_size is rather complex. */
if (oldSize > kstat->size)
{
if (FhgfsInode_getNoIsizeDecrease(fhgfsInode) )
return;
// there are current write-back threads running
if (FhgfsInode_getWriteBackCounter(fhgfsInode) )
return;
// must be read after reading the write-back counter
lastWriteBackOrIsizeWriteTime = FhgfsInode_getLastWriteBackOrIsizeWriteTime(fhgfsInode);
if (iSizeHints)
{
if (iSizeHints->ignoreIsize)
return; // iSizeHints tells us to ignore the value
if (time_after_eq((unsigned long) lastWriteBackOrIsizeWriteTime,
(unsigned long) iSizeHints->timeBeforeRemoteStat) )
return; /* i_size was updated or a writeback thread finished after we send the
* remote stat call, so concurrent stat/isize updates and we need to ignore
the remote value. */
}
#ifdef BEEGFS_DEBUG
{
static bool didLog = false;
struct super_block* sb = inOutInode->i_sb;
App* app = FhgfsOps_getApp(sb);
Logger* log = App_getLogger(app);
if (!didLog)
{
// unlock, as the logger gets a mutex lock -> possible lock order issue
spin_unlock(&inOutInode->i_lock);
LOG_DEBUG_FORMATTED(log, Log_WARNING, __func__,
"Warn-once: (possibly valid) isize-shrink oldSize: %lld newSize: %lld "
"lastWBTime: %llu, timeBeforeRemote; %llu\n",
oldSize, kstat->size, lastWriteBackOrIsizeWriteTime, iSizeHints->timeBeforeRemoteStat);
dump_stack();
didLog = true;
spin_lock(&inOutInode->i_lock);
}
}
#endif
}
}
i_size_write(inOutInode, kstat->size);
inOutInode->i_blocks = kstat->blocks;
}
/**
* See __FhgfsOps_newInodeWithParentID for details. This is just a wrapper function.
*/
struct inode* __FhgfsOps_newInode(struct super_block* sb, struct kstat* kstat, dev_t dev,
EntryInfo* entryInfo, FhgfsIsizeHints* iSizeHints, unsigned int metaVersion)
{
return __FhgfsOps_newInodeWithParentID(sb, kstat, dev, entryInfo, (NumNodeID){0}, iSizeHints, metaVersion);
}
bool __FhgfsOps_isPagedMode(struct super_block* sb)
{
App* app = FhgfsOps_getApp(sb);
Config* cfg = App_getConfig(app);
if (Config_getTuneFileCacheTypeNum(cfg) == FILECACHETYPE_Paged)
return true;
if (Config_getTuneFileCacheTypeNum(cfg) == FILECACHETYPE_Native)
return true;
return false;
}
/**
* See __FhgfsOps_doRefreshInode for details.
*/
int __FhgfsOps_refreshInode(App* app, struct inode* inode, fhgfs_stat* fhgfsStat,
FhgfsIsizeHints* iSizeHints)
{
// do not disable inode flushing from this (default) refreshInode function
return __FhgfsOps_doRefreshInode(app, inode, fhgfsStat, iSizeHints, false);
}
#endif /* FHGFSOPSINODE_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
#ifndef FHGFSOPS_IOCTL_H
#define FHGFSOPS_IOCTL_H
#include <asm/ioctl.h>
#include <linux/kernel.h>
#include <common/nodes/NumNodeID.h>
#include <uapi/beegfs_client.h>
#ifdef FS_IOC_GETVERSION
#define BEEGFS_IOC_GETVERSION_OLD FS_IOC_GETVERSION // predefined in linux/fs.h
#else // old kernels didn't have FS_IOC_GETVERSION
#define BEEGFS_IOC_GETVERSION_OLD _IOR('v', BEEGFS_IOCNUM_GETVERSION_OLD, long)
#endif
#ifdef CONFIG_COMPAT
#ifdef FS_IOC32_GETVERSION
#define BEEGFS_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION // predefined in linux/fs.h
#else // old kernels didn't have FS_IOC32_GETVERSION
#define BEEGFS_IOC32_GETVERSION_OLD _IOR('v', BEEGFS_IOCNUM_GETVERSION_OLD, int)
#endif
#endif
extern long FhgfsOpsIoctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
extern long FhgfsOpsIoctl_compatIoctl(struct file *file, unsigned int cmd, unsigned long arg);
#endif
#endif // FHGFSOPS_IOCTL_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,71 @@
/*
* fhgfs page cache methods
*
*/
#ifndef FHGFSOPSPAGES_H_
#define FHGFSOPSPAGES_H_
#include <toolkit/FhgfsPage.h>
#define BEEGFS_MAX_PAGE_LIST_SIZE (65535) // Allow a maximum page list size and therefore right now
// also IO size of 65536 pages, so 262144 MiB with 4K pages
// NOTE: *MUST* be larger than INITIAL_FIND_PAGES
extern bool FhgfsOpsPages_initPageListVecCache(void);
extern void FhgfsOpsPages_destroyPageListVecCache(void);
extern int FhgfsOps_readpagesVec(struct file* file, struct address_space* mapping,
struct list_head* page_list, unsigned num_pages);
extern int FhgfsOpsPages_readpageSync(struct file* file, struct page* page);
#ifdef KERNEL_HAS_READ_FOLIO
extern int FhgfsOps_read_folio(struct file *file, struct folio *folio);
#else
extern int FhgfsOpsPages_readpage(struct file *file, struct page *page);
#endif
#ifdef KERNEL_HAS_FOLIO
extern void FhgfsOpsPages_readahead(struct readahead_control *ractl);
#else
extern int FhgfsOpsPages_readpages(struct file* file, struct address_space* mapping,
struct list_head* pageList, unsigned numPages);
#endif
extern int FhgfsOpsPages_writepage(struct page *page, struct writeback_control *wbc);
extern int FhgfsOpsPages_writepages(struct address_space* mapping, struct writeback_control* wbc);
static inline void FhgfsOpsPages_incInodeFileSizeOnPagedRead(struct inode* inode, loff_t offset,
ssize_t readRes);
extern void __FhgfsOpsPages_incInodeFileSizeOnPagedRead(struct inode* inode, loff_t offset,
size_t readRes);
extern bool FhgfsOpsPages_isShortRead(struct inode* inode, pgoff_t pageIndex,
bool needInodeRefresh);
extern void FhgfsOpsPages_endReadPage(Logger* log, struct inode* inode,
struct FhgfsPage* fhgfsPage, int readRes);
extern void FhgfsOpsPages_endWritePage(struct page* page, int writeRes, struct inode* inode);
extern int FhgfsOpsPages_writeBackPage(struct inode *inode, struct page *page);
/**
* If the meta server has told us for some reason a wrong file-size (i_size) the caller would
* wrongly discard data beyond i_size. So we are going to correct i_size here.
*
* Note: This is the fast inline version, which should almost always only read i_size and then
* return. So mostly no need to increase the stack size.
* */
void FhgfsOpsPages_incInodeFileSizeOnPagedRead(struct inode* inode, loff_t offset, ssize_t readRes)
{
loff_t i_size = i_size_read(inode);
if (unlikely(readRes <= 0) )
return;
if (unlikely(readRes && (offset + (loff_t)readRes > i_size) ) )
__FhgfsOpsPages_incInodeFileSizeOnPagedRead(inode, offset, readRes);
}
#endif /* FHGFSOPSPAGES_H_ */

View File

@@ -0,0 +1,425 @@
#include <app/log/Logger.h>
#include <app/App.h>
#include <app/config/Config.h>
#include <filesystem/ProcFs.h>
#include <os/OsCompat.h>
#include <common/storage/StorageDefinitions.h>
#include <common/toolkit/MetadataTk.h>
#include <common/Common.h>
#include <components/worker/RWPagesWork.h>
#include <net/filesystem/FhgfsOpsRemoting.h>
#include "FhgfsOps_versions.h"
#include "FhgfsOpsSuper.h"
#include "FhgfsOpsInode.h"
#include "FhgfsOpsFile.h"
#include "FhgfsOpsDir.h"
#include "FhgfsOpsPages.h"
#include "FhgfsOpsExport.h"
#include "FhgfsXAttrHandlers.h"
static int __FhgfsOps_initApp(struct super_block* sb, char* rawMountOptions);
static void __FhgfsOps_uninitApp(App* app);
static int __FhgfsOps_constructFsInfo(struct super_block* sb, void* rawMountOptions);
static void __FhgfsOps_destructFsInfo(struct super_block* sb);
/* read-ahead size is limited by BEEGFS_DEFAULT_READAHEAD_PAGES, so this is the maximum already going
* to to the server. 32MiB read-head also seems to be a good number. It still may be reduced by
* setting /sys/class/bdi/fhgfs-${number}/read_ahead_kb */
#define BEEGFS_DEFAULT_READAHEAD_PAGES BEEGFS_MAX_PAGE_LIST_SIZE
static struct file_system_type fhgfs_fs_type =
{
.name = BEEGFS_MODULE_NAME_STR,
.owner = THIS_MODULE,
.kill_sb = FhgfsOps_killSB,
//.fs_flags = FS_BINARY_MOUNTDATA, // not required currently
#ifdef KERNEL_HAS_GET_SB_NODEV
.get_sb = FhgfsOps_getSB,
#else
.mount = FhgfsOps_mount, // basically the same thing as get_sb before
#endif
};
static struct super_operations fhgfs_super_ops =
{
.statfs = FhgfsOps_statfs,
.alloc_inode = FhgfsOps_alloc_inode,
.destroy_inode = FhgfsOps_destroy_inode,
.drop_inode = generic_drop_inode,
.put_super = FhgfsOps_putSuper,
.show_options = FhgfsOps_showOptions,
};
/**
* Creates and initializes the per-mount application object.
*/
int __FhgfsOps_initApp(struct super_block* sb, char* rawMountOptions)
{
MountConfig* mountConfig;
bool parseRes;
App* app;
int appRes;
// create mountConfig (parse from mount options)
mountConfig = MountConfig_construct();
parseRes = MountConfig_parseFromRawOptions(mountConfig, rawMountOptions);
if(!parseRes)
{
MountConfig_destruct(mountConfig);
return APPCODE_INVALID_CONFIG;
}
//printk_fhgfs(KERN_INFO, "Initializing App...\n"); // debug in
app = FhgfsOps_getApp(sb);
App_init(app, mountConfig);
appRes = App_run(app);
if(appRes != APPCODE_NO_ERROR)
{ // error occurred => clean up
printk_fhgfs_debug(KERN_INFO, "Stopping App...\n");
App_stop(app);
printk_fhgfs_debug(KERN_INFO, "Cleaning up...\n");
App_uninit(app);
printk_fhgfs_debug(KERN_INFO, "App unitialized.\n");
return appRes;
}
ProcFs_createEntries(app);
return appRes;
}
/**
* Stops and destroys the per-mount application object.
*/
void __FhgfsOps_uninitApp(App* app)
{
App_stop(app);
/* note: some of the procfs entries (e.g. remove_node) won't work anymore after app components
have been stopped, but others are still useful for finding reasons why app stop is delayed
in some cases (so we remove procfs after App_stop() ). */
ProcFs_removeEntries(app);
App_uninit(app);
}
int FhgfsOps_registerFilesystem(void)
{
return register_filesystem(&fhgfs_fs_type);
}
int FhgfsOps_unregisterFilesystem(void)
{
return unregister_filesystem(&fhgfs_fs_type);
}
/**
* Initialize sb->s_fs_info
*
* @return 0 on success, negative linux error code otherwise
*/
int __FhgfsOps_constructFsInfo(struct super_block* sb, void* rawMountOptions)
{
int res;
int appRes;
App* app;
Logger* log;
#if defined(KERNEL_HAS_SB_BDI) && !defined(KERNEL_HAS_SUPER_SETUP_BDI_NAME)
struct backing_dev_info* bdi;
#endif
// use kzalloc to also zero the bdi
FhgfsSuperBlockInfo* sbInfo = kzalloc(sizeof(FhgfsSuperBlockInfo), GFP_KERNEL);
if (!sbInfo)
{
printk_fhgfs_debug(KERN_INFO, "Failed to allocate memory for FhgfsSuperBlockInfo");
sb->s_fs_info = NULL;
return -ENOMEM;
}
sb->s_fs_info = sbInfo;
appRes = __FhgfsOps_initApp(sb, rawMountOptions);
if(appRes)
{
printk_fhgfs_debug(KERN_INFO, "Failed to initialize App object");
res = -EINVAL;
goto outFreeSB;
}
app = FhgfsOps_getApp(sb);
log = App_getLogger(app);
IGNORE_UNUSED_VARIABLE(log);
#if defined(KERNEL_HAS_SB_BDI)
#if defined(KERNEL_HAS_SUPER_SETUP_BDI_NAME) && !defined(KERNEL_HAS_BDI_SETUP_AND_REGISTER)
{
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
res = super_setup_bdi_name(sb, BEEGFS_MODULE_NAME_STR "-%ld",
atomic_long_inc_return(&bdi_seq));
}
#else
bdi = &sbInfo->bdi;
/* NOTE: The kernel expects a fully initialized bdi structure, so at a minimum it has to be
* allocated by kzalloc() or memset(bdi, 0, sizeof(*bdi)).
* we don't set the congest_* callbacks (like every other filesystem) because those are
* intended for dm and md.
*/
bdi->ra_pages = BEEGFS_DEFAULT_READAHEAD_PAGES;
#if defined(KERNEL_HAS_BDI_CAP_MAP_COPY)
res = bdi_setup_and_register(bdi, BEEGFS_MODULE_NAME_STR, BDI_CAP_MAP_COPY);
#else
res = bdi_setup_and_register(bdi, BEEGFS_MODULE_NAME_STR);
#endif
#endif
if (res)
{
Logger_logFormatted(log, 2, __func__, "Failed to init super-block (bdi) information: %d",
res);
__FhgfsOps_uninitApp(app);
goto outFreeSB;
}
#endif
// set root inode attribs to uninit'ed
FhgfsOps_setHasRootEntryInfo(sb, false);
FhgfsOps_setIsRootInited(sb, false);
printk_fhgfs(KERN_INFO, "BeeGFS mount ready.\n");
return 0; // all ok, res should be 0 here
outFreeSB:
kfree(sbInfo);
sb->s_fs_info = NULL;
return res;
}
/**
* Unitialize the entire sb->s_fs_info object
*/
void __FhgfsOps_destructFsInfo(struct super_block* sb)
{
/* sb->s_fs_info might be NULL if __FhgfsOps_constructFsInfo() failed */
if (sb->s_fs_info)
{
App* app = FhgfsOps_getApp(sb);
//call destroy iff not initialised/registered by super_setup_bdi_name
#if defined(KERNEL_HAS_SB_BDI)
#if !defined(KERNEL_HAS_SUPER_SETUP_BDI_NAME) || defined(KERNEL_HAS_BDI_SETUP_AND_REGISTER)
struct backing_dev_info* bdi = FhgfsOps_getBdi(sb);
bdi_destroy(bdi);
#endif
#endif
__FhgfsOps_uninitApp(app);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
printk_fhgfs(KERN_INFO, "BeeGFS unmounted.\n");
}
}
/**
* Fill the file system superblock (vfs object)
*/
int FhgfsOps_fillSuper(struct super_block* sb, void* rawMountOptions, int silent)
{
App* app = NULL;
Config* cfg = NULL;
struct inode* rootInode;
struct dentry* rootDentry;
struct kstat kstat;
EntryInfo entryInfo;
FhgfsIsizeHints iSizeHints;
// init per-mount app object
if(__FhgfsOps_constructFsInfo(sb, rawMountOptions) )
return -ECANCELED;
app = FhgfsOps_getApp(sb);
cfg = App_getConfig(app);
// set up super block data
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = BEEGFS_MAGIC;
sb->s_op = &fhgfs_super_ops;
sb->s_time_gran = 1000000000; // granularity of c/m/atime in ns
#ifdef KERNEL_HAS_SB_NODIRATIME
sb->s_flags |= SB_NODIRATIME;
#else
sb->s_flags |= MS_NODIRATIME;
#endif
if (Config_getSysXAttrsEnabled(cfg ) )
sb->s_xattr = fhgfs_xattr_handlers_noacl; // handle only user xattrs
#ifdef KERNEL_HAS_GET_ACL
if (Config_getSysACLsEnabled(cfg) )
{
sb->s_xattr = fhgfs_xattr_handlers; // replace with acl-capable xattr handlers
#ifdef SB_POSIXACL
sb->s_flags |= SB_POSIXACL;
#else
sb->s_flags |= MS_POSIXACL;
#endif
}
#endif // KERNEL_HAS_GET_ACL
if (Config_getSysXAttrsCheckCapabilities(cfg) != CHECKCAPABILITIES_Always)
#if defined(SB_NOSEC)
sb->s_flags |= SB_NOSEC;
#else
sb->s_flags |= MS_NOSEC;
#endif
/* MS_ACTIVE is rather important as it marks the super block being successfully initialized and
* allows the vfs to keep important inodes in the cache. However, it seems it is already
* initialized in vfs generic mount functions.
sb->s_flags |= MS_ACTIVE; // used in iput_final() */
// NFS kernel export is probably not worth the backport efforts for kernels before 2.6.29
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
sb->s_export_op = &fhgfs_export_ops;
#endif
#if defined(KERNEL_HAS_SB_BDI)
sb->s_bdi = FhgfsOps_getBdi(sb);
#endif
// init root inode
memset(&kstat, 0, sizeof(struct kstat) );
kstat.ino = BEEGFS_INODE_ROOT_INO;
kstat.mode = S_IFDIR | 0777; // allow access for everyone
kstat.atime = kstat.mtime = kstat.ctime = current_fs_time(sb);
kstat.uid = current_fsuid();
kstat.gid = current_fsgid();
kstat.blksize = Config_getTuneInodeBlockSize(cfg);
kstat.nlink = 1;
// root entryInfo is always updated when someone asks for it (so we just set dummy values here)
EntryInfo_init(&entryInfo, NodeOrGroup_fromGroup(0), StringTk_strDup(""), StringTk_strDup(""),
StringTk_strDup(""), DirEntryType_DIRECTORY, 0);
rootInode = __FhgfsOps_newInode(sb, &kstat, 0, &entryInfo, &iSizeHints, 0);
if(!rootInode || IS_ERR(rootInode))
{
__FhgfsOps_destructFsInfo(sb);
return IS_ERR(rootInode) ? PTR_ERR(rootInode) : -ENOMEM;
}
rootDentry = d_make_root(rootInode);
if(!rootDentry)
{
__FhgfsOps_destructFsInfo(sb);
return -ENOMEM;
}
#ifdef KERNEL_HAS_S_D_OP
// linux 2.6.38 switched from individual per-dentry to defaul superblock d_ops.
/* note: Only set default dentry operations here, as we don't want those OPs set for the root
* dentry. In fact, setting as before would only slow down everything a bit, due to
* useless revalidation of our root dentry. */
sb->s_d_op = &fhgfs_dentry_ops;
#endif // KERNEL_HAS_S_D_OP
rootDentry->d_time = jiffies;
sb->s_root = rootDentry;
return 0;
}
/*
* Called by FhgfsOps_killSB()->kill_anon_super()->generic_shutdown_super()
*/
void FhgfsOps_putSuper(struct super_block* sb)
{
if (sb->s_fs_info)
{
App* app = FhgfsOps_getApp(sb);
if(app)
__FhgfsOps_destructFsInfo(sb);
}
}
void FhgfsOps_killSB(struct super_block* sb)
{
App* app = FhgfsOps_getApp(sb);
if (app) // might be NULL on unsuccessful mount attempt
App_setConnRetriesEnabled(app, false); // faster umount on communication errors
RWPagesWork_flushWorkQueue();
#if defined(KERNEL_HAS_SB_BDI) && LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0)
/**
* s_fs_info might be NULL
*/
if (likely(sb->s_fs_info) )
{
struct backing_dev_info* bdi = FhgfsOps_getBdi(sb);
bdi_unregister(bdi);
}
#endif
kill_anon_super(sb);
}
#ifdef KERNEL_HAS_SHOW_OPTIONS_DENTRY
extern int FhgfsOps_showOptions(struct seq_file* sf, struct dentry* dentry)
{
struct super_block* super = dentry->d_sb;
#else
extern int FhgfsOps_showOptions(struct seq_file* sf, struct vfsmount* vfs)
{
struct super_block* super = vfs->mnt_sb;
#endif
App* app = FhgfsOps_getApp(super);
MountConfig* mountConfig = App_getMountConfig(app);
MountConfig_showOptions(mountConfig, sf);
return 0;
}

View File

@@ -0,0 +1,124 @@
#ifndef FHGFSOPSSUPER_H_
#define FHGFSOPSSUPER_H_
#include <app/App.h>
#include <common/Common.h>
#include "FhgfsOps_versions.h"
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/backing-dev.h>
#include <linux/seq_file.h>
#define BEEGFS_MAGIC 0x19830326 /* some random number to identify fhgfs */
#define BEEGFS_STATFS_BLOCKSIZE_SHIFT 19 /* bit shift to compute reported block size in statfs() */
#define BEEGFS_STATFS_BLOCKSIZE (1UL << BEEGFS_STATFS_BLOCKSIZE_SHIFT)
struct FhgfsSuperBlockInfo;
typedef struct FhgfsSuperBlockInfo FhgfsSuperBlockInfo;
extern int FhgfsOps_registerFilesystem(void);
extern int FhgfsOps_unregisterFilesystem(void);
extern void FhgfsOps_putSuper(struct super_block* sb);
extern void FhgfsOps_killSB(struct super_block* sb);
#ifdef KERNEL_HAS_SHOW_OPTIONS_DENTRY
extern int FhgfsOps_showOptions(struct seq_file* sf, struct dentry* dentry);
#else
extern int FhgfsOps_showOptions(struct seq_file* sf, struct vfsmount* vfs);
#endif
extern int FhgfsOps_fillSuper(struct super_block* sb, void* rawMountOptions, int silent);
// getters & setters
static inline App* FhgfsOps_getApp(struct super_block* sb);
static inline struct backing_dev_info* FhgfsOps_getBdi(struct super_block* sb);
static inline bool FhgfsOps_getHasRootEntryInfo(struct super_block* sb);
static inline void FhgfsOps_setHasRootEntryInfo(struct super_block* sb, bool isInited);
static inline bool FhgfsOps_getIsRootInited(struct super_block* sb);
static inline void FhgfsOps_setIsRootInited(struct super_block* sb, bool isInited);
struct FhgfsSuperBlockInfo
{
App app;
#if !defined(KERNEL_HAS_SUPER_SETUP_BDI_NAME)
struct backing_dev_info bdi;
#endif
bool haveRootEntryInfo; // false until the root EntryInfo is set in root-FhgfsInode
bool isRootInited; /* false until root inode attrs have been fetched/initialized in
_lookup() (because kernel does not automatically lookup/revalidate the root inode) */
};
/* Note: Functions below rely on sb->s_fs_info. Umount operations / super-block destroy operations
* in FhgfsOpsSuper.c might be more obvious if we would test here if sb->fs_info is NULL.
* However, the functions below are frequently called from various parts of the file system
* and additional checks might cause an overhead. Therefore those checks are not done
* here.
*/
/**
* NOTE: Make sure sb->s_fs_info is initialized!
*/
App* FhgfsOps_getApp(struct super_block* sb)
{
FhgfsSuperBlockInfo* sbInfo = sb->s_fs_info;
return &(sbInfo->app);
}
/**
* NOTE: Make sure sb->s_fs_info is initialized!
*/
struct backing_dev_info* FhgfsOps_getBdi(struct super_block* sb)
{
#if defined(KERNEL_HAS_SUPER_SETUP_BDI_NAME)
return sb->s_bdi;
#else
FhgfsSuperBlockInfo* sbInfo = sb->s_fs_info;
return &(sbInfo->bdi);
#endif
}
/**
* Note: Do not get it (reliably) without also locking fhgfsInode->entryInfoLock!
*/
bool FhgfsOps_getHasRootEntryInfo(struct super_block* sb)
{
FhgfsSuperBlockInfo* sbInfo = sb->s_fs_info;
return sbInfo->haveRootEntryInfo;
}
/**
* Note: Do not set it without also locking fhgfsInode->entryInfoLock! Exception is during the
* mount process.
*/
void FhgfsOps_setHasRootEntryInfo(struct super_block* sb, bool haveRootEntryInfo)
{
FhgfsSuperBlockInfo* sbInfo = sb->s_fs_info;
sbInfo->haveRootEntryInfo = haveRootEntryInfo;
}
bool FhgfsOps_getIsRootInited(struct super_block* sb)
{
FhgfsSuperBlockInfo* sbInfo = sb->s_fs_info;
return sbInfo->isRootInited;
}
void FhgfsOps_setIsRootInited(struct super_block* sb, bool isInited)
{
FhgfsSuperBlockInfo* sbInfo = sb->s_fs_info;
sbInfo->isRootInited = isInited;
}
#endif /* FHGFSOPSSUPER_H_ */

View File

@@ -0,0 +1,319 @@
#include <app/log/Logger.h>
#include <app/App.h>
#include <common/Common.h>
#include <filesystem/FhgfsOpsHelper.h>
#include <net/filesystem/FhgfsOpsRemoting.h>
#include <os/OsCompat.h>
#include <toolkit/NoAllocBufferStore.h>
#include <toolkit/StatFsCache.h>
#include "FhgfsOps_versions.h"
#include "FhgfsOpsFile.h"
#include "FhgfsOpsDir.h"
#include "FhgfsOpsInode.h"
#include "FhgfsOpsSuper.h"
/**
* A basic permission() method. Only required to tell the VFS we do not support RCU path walking.
*
* @return 0 on success, negative linux error code otherwise
*/
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
int FhgfsOps_permission(struct mnt_idmap* idmap, struct inode *inode, int mask)
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
int FhgfsOps_permission(struct user_namespace* mnt_userns, struct inode *inode, int mask)
#elif defined(KERNEL_HAS_PERMISSION_2)
int FhgfsOps_permission(struct inode *inode, int mask)
#elif defined(KERNEL_HAS_PERMISSION_FLAGS)
int FhgfsOps_permission(struct inode *inode, int mask, unsigned int flags)
#else
/* <= 2.6.26 */
int FhgfsOps_permission(struct inode *inode, int mask, struct nameidata *nd)
#endif
{
/* note: 2.6.38 introduced rcu-walk mode, which is inappropriate for us, because we need the
parent. the code below tells vfs to call this again in old ref-walk mode.
(see Documentation/filesystems/vfs.txt:d_revalidate) */
#ifdef MAY_NOT_BLOCK
if(mask & MAY_NOT_BLOCK)
return -ECHILD;
#elif defined(IPERM_FLAG_RCU)
if(flags & IPERM_FLAG_RCU)
return -ECHILD;
#endif // LINUX_VERSION_CODE
return os_generic_permission(inode, mask);
}
int FhgfsOps_statfs(struct dentry* dentry, struct kstatfs* kstatfs)
{
struct super_block* sb = dentry->d_sb;
const char* logContext = __func__;
App* app = FhgfsOps_getApp(sb);
Logger* log = App_getLogger(app);
StatFsCache* statfsCache = App_getStatFsCache(app);
int retVal = -EREMOTEIO;
FhgfsOpsErr statRes;
int64_t sizeTotal = 0;
int64_t sizeFree = 0;
FhgfsOpsHelper_logOp(Log_SPAM, app, NULL, NULL, logContext);
memset(kstatfs, 0, sizeof(struct kstatfs) );
kstatfs->f_type = BEEGFS_MAGIC;
kstatfs->f_namelen = NAME_MAX;
kstatfs->f_files = 0; // total used file nodes (not supported currently)
kstatfs->f_ffree = 0; // free file nodes (not supported currently)
kstatfs->f_bsize = BEEGFS_STATFS_BLOCKSIZE;
kstatfs->f_frsize = BEEGFS_STATFS_BLOCKSIZE; /* should be same as f_bsize, as some versions of
glibc do not correctly handle frsize!=bsize (both are used inconsistently as a unit for
f_blocks & co) */
statRes = StatFsCache_getFreeSpace(statfsCache, &sizeTotal, &sizeFree);
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "remoting complete. result: %d", (int)statRes);
IGNORE_UNUSED_VARIABLE(log);
retVal = FhgfsOpsErr_toSysErr(statRes);
if(statRes == FhgfsOpsErr_SUCCESS)
{ // success => assign received values
// note: f_blocks, f_bfree, and f_bavail are reported in units of f_bsize.
unsigned char blockBits = BEEGFS_STATFS_BLOCKSIZE_SHIFT; // just a shorter name
unsigned long blockSizeDec = (1 << blockBits) - 1; // for rounding
kstatfs->f_blocks = (sizeTotal + blockSizeDec) >> blockBits;
kstatfs->f_bfree = (sizeFree + blockSizeDec) >> blockBits; // free for superuser
kstatfs->f_bavail = (sizeFree + blockSizeDec) >> blockBits; // available for non-superuser
}
return retVal;
}
#ifdef KERNEL_HAS_GET_SB_NODEV
int FhgfsOps_getSB(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
return get_sb_nodev(fs_type, flags, data, FhgfsOps_fillSuper, mnt);
}
#else
/* kernel 2.6.39 switched from get_sb() to mount(), which provides similar functionality from our
point of view. */
struct dentry* FhgfsOps_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_nodev(fs_type, flags, data, FhgfsOps_fillSuper);
}
#endif // LINUX_VERSION_CODE
/**
* Note: Called by close(2) system call before the actual FhgfsOps_close().
*/
int FhgfsOps_flush(struct file *file, fl_owner_t id)
{
struct dentry* dentry = file_dentry(file);
struct inode* inode = file_inode(file);
App* app = FhgfsOps_getApp(dentry->d_sb);
const char* logContext = "FhgfsOps_flush";
FhgfsOpsHelper_logOp(Log_SPAM, app, dentry, inode, logContext);
/* note: if a buffer cannot be flushed here, we still have the inode in the InodeRefStore, so
that the flusher can write it asynchronously */
return __FhgfsOps_flush(app, file, false, false, true, true);
}
/**
* Note: Called for new cache objects (see FhgfsOps_initInodeCache() )
*/
#ifdef SLAB_CTOR_CONSTRUCTOR
void FhgfsOps_initInodeOnce(void* inode, struct kmem_cache* cache, unsigned long flags)
{
// note: no, this is not a typo. since kernel version 2.6.22, we're no longer checking
// the slab_... flags (even though the argument still exists in the method signature).
if( (flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR) ) == SLAB_CTOR_CONSTRUCTOR)
{ // fresh construction => initialize object
FhgfsInode* fhgfsInode = (FhgfsInode*)inode;
FhgfsInode_initOnce(fhgfsInode);
inode_init_once(&fhgfsInode->vfs_inode);
}
}
#else
#if defined(KERNEL_HAS_KMEMCACHE_CACHE_FLAGS_CTOR)
void FhgfsOps_initInodeOnce(void* inode, struct kmem_cache* cache, unsigned long flags)
#elif defined(KERNEL_HAS_KMEMCACHE_CACHE_CTOR)
void FhgfsOps_initInodeOnce(struct kmem_cache* cache, void* inode)
#else
void FhgfsOps_initInodeOnce(void* inode)
#endif // LINUX_VERSION_CODE
{
FhgfsInode* fhgfsInode = (FhgfsInode*)inode;
FhgfsInode_initOnce(fhgfsInode);
inode_init_once(&fhgfsInode->vfs_inode);
}
#endif // LINUX_VERSION_CODE
#ifndef KERNEL_HAS_GENERIC_FILE_LLSEEK_UNLOCKED
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,26)
/**
* Note: This is just an exact copy of the kernel method (from kernel 2.6.30), which was introduced
* in 2.6.27.
*
* generic_file_llseek_unlocked - lockless generic llseek implementation
* @file: file structure to seek on
* @offset: file offset to seek to
* @origin: type of seek
*
* Updates the file offset to the value specified by @offset and @origin.
* Locking must be provided by the caller.
*/
loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
{
struct inode *inode = file->f_mapping->host;
switch (origin)
{
case SEEK_END:
offset += inode->i_size;
break;
case SEEK_CUR:
/*
* Here we special-case the lseek(fd, 0, SEEK_CUR)
* position-querying operation. Avoid rewriting the "same"
* f_pos value back to the file because a concurrent read(),
* write() or lseek() might have altered it
*/
if (offset == 0)
return file->f_pos;
offset += file->f_pos;
break;
}
if (offset < 0 || offset > (loff_t) inode->i_sb->s_maxbytes)
return -EINVAL;
/* Special lock needed here? */
if (offset != file->f_pos)
{
file->f_pos = offset;
file->f_version = 0;
}
return offset;
}
#else
/**
* Note: The lock (to which the _unlocked suffix refers) was dropped in linux 3.2, so we can now
* just call the kernel method without the _unlocked suffix.
*/
loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
{
return generic_file_llseek(file, offset, origin);
}
#endif // LINUX_VERSION_CODE
#endif
#ifndef KERNEL_HAS_DENTRY_PATH_RAW
/**
* Get the relative path of a dentry to the mount point
*
* @param dentry The dentry we want the path for
* @param buf A preallocated buffer
* @param buflen The size of buf
*
* This is a compatibility function for kernels before 2.6.38.
* Alternatively, using d_path() is not easy, as we do not have
* struct vfsmnt. Unfortunately, the kernel interface to get vfsmnt got frequently modified,
* which would require us to have even more compatibility code.
* NOTE: We use the vfs global dcache_lock here, which is rather slow and which will lock up all
* file systems if something goes wrong
* How it works:
* We build up the path backwards from the end of char * buf, put a "/" before it it and return
* the pointer to "/"
*/
char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
{
char* outStoreBuf = buf;
const ssize_t storeBufLen = buflen;
ssize_t bufLenLeft = storeBufLen; // remaining length
char* currentBufStart = (outStoreBuf) + bufLenLeft;
int nameLen;
*--currentBufStart = '\0';
bufLenLeft--;
spin_lock(&dcache_lock);
while(!IS_ROOT(dentry) )
{
nameLen = dentry->d_name.len;
bufLenLeft -= nameLen + 1;
if (bufLenLeft < 0)
goto err_too_long_unlock;
currentBufStart -= nameLen;
memcpy(currentBufStart, dentry->d_name.name, nameLen);
*--currentBufStart = '/';
dentry = dentry->d_parent;
}
spin_unlock(&dcache_lock);
// return "/" instead of an empty string for the root directory
if(bufLenLeft == (storeBufLen-1) ) // -1 for the terminating zero
{ // dentry is (mount) root directory
*--currentBufStart = '/';
}
return currentBufStart;
err_too_long_unlock:
spin_unlock(&dcache_lock);
return ERR_PTR(-ENAMETOOLONG);
}
#endif // LINUX_VERSION_CODE (2.6.38)

View File

@@ -0,0 +1,109 @@
#ifndef FHGFSOPS_VERSIONS_H_
#define FHGFSOPS_VERSIONS_H_
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/version.h>
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/pagevec.h>
#include <linux/pagemap.h>
#include <linux/page-flags.h>
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
int FhgfsOps_permission(struct mnt_idmap* idmap, struct inode *inode, int mask);
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
int FhgfsOps_permission(struct user_namespace* mnt_userns, struct inode *inode, int mask);
#elif defined(KERNEL_HAS_PERMISSION_2)
int FhgfsOps_permission(struct inode *inode, int mask);
#elif defined(KERNEL_HAS_PERMISSION_FLAGS)
int FhgfsOps_permission(struct inode *inode, int mask, unsigned int flags);
#else
/* <= 2.6.26 */
int FhgfsOps_permission(struct inode *inode, int mask, struct nameidata *nd);
#endif
#ifdef KERNEL_HAS_GET_SB_NODEV
extern int FhgfsOps_getSB(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt);
#else
extern struct dentry* FhgfsOps_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data);
#endif // LINUX_VERSION_CODE
extern int FhgfsOps_statfs(struct dentry* dentry, struct kstatfs* kstatfs);
extern int FhgfsOps_flush(struct file *file, fl_owner_t id);
#if defined(KERNEL_HAS_KMEMCACHE_CACHE_FLAGS_CTOR)
extern void FhgfsOps_initInodeOnce(void* inode, struct kmem_cache* cache, unsigned long flags);
#elif defined(KERNEL_HAS_KMEMCACHE_CACHE_CTOR)
extern void FhgfsOps_initInodeOnce(struct kmem_cache* cache, void* inode);
#else
extern void FhgfsOps_initInodeOnce(void* inode);
#endif // LINUX_VERSION_CODE
////////////// start of kernel method emulators //////////////
#ifndef KERNEL_HAS_GENERIC_FILE_LLSEEK_UNLOCKED
extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin);
#endif // LINUX_VERSION_CODE
#ifndef KERNEL_HAS_SET_NLINK
static inline void set_nlink(struct inode *inode, unsigned int nlink);
#endif // LINUX_VERSION_CODE
#ifndef KERNEL_HAS_DENTRY_PATH_RAW
char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen);
#endif
#ifndef KERNEL_HAS_FILE_INODE
static inline struct inode *file_inode(struct file *f);
#endif // KERNEL_HAS_FILE_INODE
#ifndef KERNEL_HAS_SET_NLINK
/**
* Note: This is just an emulator that does the job for old kernels.
*/
void set_nlink(struct inode *inode, unsigned int nlink)
{
inode->i_nlink = nlink;
}
#endif // LINUX_VERSION_CODE
#ifndef KERNEL_HAS_IHOLD
/*
* get additional reference to inode; caller must already hold one.
*/
static inline void ihold(struct inode *inode)
{
WARN_ON(atomic_inc_return(&inode->i_count) < 2);
}
#endif // KERNEL_HAS_IHOLD
#ifndef KERNEL_HAS_FILE_DENTRY
static inline struct dentry *file_dentry(const struct file *file)
{
return file->f_path.dentry;
}
#endif
/* some ofeds backport this too - and put a define around it ... */
#if !defined(KERNEL_HAS_FILE_INODE) && !defined(file_inode)
struct inode *file_inode(struct file *f)
{
return file_dentry(f)->d_inode;
}
#endif // KERNEL_HAS_FILE_INODE
#endif /*FHGFSOPS_VERSIONS_H_*/

View File

@@ -0,0 +1,461 @@
#include <linux/fs.h>
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
#include "common/Common.h"
#include "FhgfsOpsInode.h"
#include "FhgfsOpsHelper.h"
#include "FhgfsXAttrHandlers.h"
#define FHGFS_XATTR_USER_PREFIX "user."
#define FHGFS_XATTR_SECURITY_PREFIX "security."
#ifdef KERNEL_HAS_GET_ACL
/**
* Called when an ACL Xattr is set. Responsible for setting the mode bits corresponding to the
* ACL mask.
*/
#if defined(KERNEL_HAS_XATTR_HANDLERS_INODE_ARG)
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
static int FhgfsXAttrSetACL(const struct xattr_handler* handler, struct mnt_idmap* id_map,
struct dentry* dentry, struct inode* inode, const char* name, const void* value, size_t size,
int flags)
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
static int FhgfsXAttrSetACL(const struct xattr_handler* handler, struct user_namespace* mnt_userns,
struct dentry* dentry, struct inode* inode, const char* name, const void* value, size_t size,
int flags)
#else
static int FhgfsXAttrSetACL(const struct xattr_handler* handler, struct dentry* dentry,
struct inode* inode, const char* name, const void* value, size_t size, int flags)
#endif
{
int handler_flags = handler->flags;
#elif defined(KERNEL_HAS_XATTR_HANDLER_PTR_ARG)
static int FhgfsXAttrSetACL(const struct xattr_handler* handler, struct dentry* dentry,
const char* name, const void* value, size_t size, int flags)
{
int handler_flags = handler->flags;
struct inode* inode = dentry->d_inode;
#else
static int FhgfsXAttrSetACL(struct dentry *dentry, const char *name, const void *value, size_t size,
int flags, int handler_flags)
{
struct inode* inode = dentry->d_inode;
#endif
char* attrName;
FhgfsOpsHelper_logOpDebug(FhgfsOps_getApp(dentry->d_sb), dentry, NULL, __func__, "Called.");
// Enforce an empty name here (which means the name of the Xattr has to be
// fully given by the POSIX_ACL_XATTR_... defines)
if(strcmp(name, "") )
return -EINVAL;
if(!os_inode_owner_or_capable(inode) )
return -EPERM;
if(S_ISLNK(inode->i_mode) )
return -EOPNOTSUPP;
if(handler_flags == ACL_TYPE_ACCESS)
{
struct posix_acl* acl;
struct iattr attr;
int aclEquivRes;
int setAttrRes;
// if we set the access ACL, we also need to update the file mode permission bits.
attr.ia_mode = inode->i_mode;
attr.ia_valid = ATTR_MODE;
acl = os_posix_acl_from_xattr(value, size);
if(IS_ERR(acl) )
return PTR_ERR(acl);
aclEquivRes = posix_acl_equiv_mode(acl, &attr.ia_mode);
if(aclEquivRes == 0) // ACL can be exactly represented by file mode permission bits
{
value = NULL;
}
else if(aclEquivRes < 0)
{
posix_acl_release(acl);
return -EINVAL;
}
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
setAttrRes = FhgfsOps_setattr(&nop_mnt_idmap, dentry, &attr);
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
setAttrRes = FhgfsOps_setattr(&init_user_ns, dentry, &attr);
#else
setAttrRes = FhgfsOps_setattr(dentry, &attr);
#endif
if(setAttrRes < 0)
return setAttrRes;
posix_acl_release(acl);
// Name of the Xattr to be set later
attrName = XATTR_NAME_POSIX_ACL_ACCESS;
}
else if(handler_flags == ACL_TYPE_DEFAULT)
{
// Note: The default acl is not reflected in any file mode permission bits.
// Just check for correctness here, and delete the xattr if the acl is empty.
struct posix_acl* acl;
acl = os_posix_acl_from_xattr(value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
value = NULL;
else
posix_acl_release(acl);
attrName = XATTR_NAME_POSIX_ACL_DEFAULT;
}
else
return -EOPNOTSUPP;
if(value)
return FhgfsOps_setxattr(dentry, attrName, value, size, flags);
else // value == NULL: Remove the ACL extended attribute.
{
int removeRes = FhgfsOps_removexattr(dentry, attrName);
if (removeRes == 0 || removeRes == -ENODATA) // If XA didn't exist anyway, return 0.
return 0;
else
return removeRes;
}
}
/**
* The get-function of the xattr handler which handles the XATTR_NAME_POSIX_ACL_ACCESS and
* XATTR_NAME_POSIX_ACL_DEFAULT xattrs.
* @param name has to be a pointer to an empty string ("").
*/
#if defined(KERNEL_HAS_XATTR_HANDLERS_INODE_ARG)
static int FhgfsXAttrGetACL(const struct xattr_handler* handler, struct dentry* dentry,
struct inode* inode, const char* name, void* value, size_t size)
{
int handler_flags = handler->flags;
#elif defined(KERNEL_HAS_XATTR_HANDLER_PTR_ARG)
static int FhgfsXAttrGetACL(const struct xattr_handler* handler, struct dentry* dentry,
const char* name, void* value, size_t size)
{
int handler_flags = handler->flags;
#else
int FhgfsXAttrGetACL(struct dentry* dentry, const char* name, void* value, size_t size,
int handler_flags)
{
#endif
char* attrName;
FhgfsOpsHelper_logOpDebug(FhgfsOps_getApp(dentry->d_sb), dentry, NULL, __func__, "Called.");
// For simplicity we enforce an empty name here (which means the name of the Xattr has to be
// fully given by the POSIX_ACL_XATTR_... defines)
if(strcmp(name, "") )
return -EINVAL;
if(handler_flags == ACL_TYPE_ACCESS)
attrName = XATTR_NAME_POSIX_ACL_ACCESS;
else if(handler_flags == ACL_TYPE_DEFAULT)
attrName = XATTR_NAME_POSIX_ACL_DEFAULT;
else
return -EOPNOTSUPP;
return FhgfsOps_getxattr(dentry, attrName, value, size);
}
#endif // KERNEL_HAS_GET_ACL
/**
* The get-function which is used for all the user.* xattrs.
*/
#if defined(KERNEL_HAS_XATTR_HANDLERS_INODE_ARG)
static int FhgfsXAttr_getUser(const struct xattr_handler* handler, struct dentry* dentry,
struct inode* inode, const char* name, void* value, size_t size)
#elif defined(KERNEL_HAS_XATTR_HANDLER_PTR_ARG)
static int FhgfsXAttr_getUser(const struct xattr_handler* handler, struct dentry* dentry,
const char* name, void* value, size_t size)
#elif defined(KERNEL_HAS_DENTRY_XATTR_HANDLER)
static int FhgfsXAttr_getUser(struct dentry* dentry, const char* name, void* value, size_t size,
int handler_flags)
#else
static int FhgfsXAttr_getUser(struct inode* inode, const char* name, void* value, size_t size)
#endif
{
FhgfsOpsErr res;
char* prefixedName = os_kmalloc(strlen(name) + sizeof(FHGFS_XATTR_USER_PREFIX) );
// Note: strlen does not count the terminating '\0', but sizeof does. So we have space for
// exactly one '\0' which coincidally is just what we need.
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
FhgfsOpsHelper_logOpDebug(FhgfsOps_getApp(dentry->d_sb), dentry, NULL, __func__,
"(name: %s; size: %u)", name, size);
#else
FhgfsOpsHelper_logOpDebug(FhgfsOps_getApp(inode->i_sb), NULL, inode, __func__,
"(name: %s; size: %u)", name, size);
#endif
// add name prefix which has been removed by the generic function
if(!prefixedName)
return -ENOMEM;
strcpy(prefixedName, FHGFS_XATTR_USER_PREFIX);
strcpy(prefixedName + sizeof(FHGFS_XATTR_USER_PREFIX) - 1, name); // sizeof-1 to remove the '\0'
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
res = FhgfsOps_getxattr(dentry, prefixedName, value, size);
#else
res = FhgfsOps_getxattr(inode, prefixedName, value, size);
#endif
kfree(prefixedName);
return res;
}
/**
* The set-function which is used for all the user.* xattrs.
*/
#if defined(KERNEL_HAS_XATTR_HANDLERS_INODE_ARG)
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
static int FhgfsXAttr_setUser(const struct xattr_handler* handler, struct mnt_idmap* id_map,
struct dentry* dentry, struct inode* inode, const char* name, const void* value, size_t size,
int flags)
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
static int FhgfsXAttr_setUser(const struct xattr_handler* handler, struct user_namespace* mnt_userns,
struct dentry* dentry, struct inode* inode, const char* name, const void* value, size_t size,
int flags)
#else
static int FhgfsXAttr_setUser(const struct xattr_handler* handler, struct dentry* dentry,
struct inode* inode, const char* name, const void* value, size_t size, int flags)
#endif
#elif defined(KERNEL_HAS_XATTR_HANDLER_PTR_ARG)
static int FhgfsXAttr_setUser(const struct xattr_handler* handler, struct dentry* dentry,
const char* name, const void* value, size_t size, int flags)
#elif defined(KERNEL_HAS_DENTRY_XATTR_HANDLER)
static int FhgfsXAttr_setUser(struct dentry* dentry, const char* name, const void* value, size_t size,
int flags, int handler_flags)
#endif // KERNEL_HAS_DENTRY_XATTR_HANDLER
{
FhgfsOpsErr res;
char* prefixedName = os_kmalloc(strlen(name) + sizeof(FHGFS_XATTR_USER_PREFIX) );
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
FhgfsOpsHelper_logOpDebug(FhgfsOps_getApp(dentry->d_sb), dentry, NULL, __func__,
"(name: %s)", name);
#else
FhgfsOpsHelper_logOpDebug(FhgfsOps_getApp(inode->i_sb), NULL, inode, __func__,
"(name: %s)", name);
#endif // KERNEL_HAS_DENTRY_XATTR_HANDLER
// add name prefix which has been removed by the generic function
if(!prefixedName)
return -ENOMEM;
strcpy(prefixedName, FHGFS_XATTR_USER_PREFIX);
strcpy(prefixedName + sizeof(FHGFS_XATTR_USER_PREFIX) - 1, name); // sizeof-1 to remove the '\0'
if (value)
{
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
res = FhgfsOps_setxattr(dentry, prefixedName, value, size, flags);
#else
res = FhgfsOps_setxattr(inode, prefixedName, value, size, flags);
#endif // KERNEL_HAS_DENTRY_XATTR_HANDLER
}
else
{
res = FhgfsOps_removexattr(dentry, prefixedName);
}
kfree(prefixedName);
return res;
}
/**
* The get-function which is used for all the security.* xattrs.
*/
#if defined(KERNEL_HAS_XATTR_HANDLERS_INODE_ARG)
static int FhgfsXAttr_getSecurity(const struct xattr_handler* handler, struct dentry* dentry,
struct inode* inode, const char* name, void* value, size_t size)
#elif defined(KERNEL_HAS_XATTR_HANDLER_PTR_ARG)
static int FhgfsXAttr_getSecurity(const struct xattr_handler* handler, struct dentry* dentry,
const char* name, void* value, size_t size)
#elif defined(KERNEL_HAS_DENTRY_XATTR_HANDLER)
static int FhgfsXAttr_getSecurity(struct dentry* dentry, const char* name, void* value, size_t size,
int handler_flags)
#else
static int FhgfsXAttr_getSecurity(struct inode* inode, const char* name, void* value, size_t size)
#endif
{
FhgfsOpsErr res;
char* prefixedName = os_kmalloc(strlen(name) + sizeof(FHGFS_XATTR_SECURITY_PREFIX) );
// Note: strlen does not count the terminating '\0', but sizeof does. So we have space for
// exactly one '\0' which coincidally is just what we need.
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
FhgfsOpsHelper_logOpDebug(FhgfsOps_getApp(dentry->d_sb), dentry, NULL, __func__,
"(name: %s; size: %u)", name, size);
#else
FhgfsOpsHelper_logOpDebug(FhgfsOps_getApp(inode->i_sb), NULL, inode, __func__,
"(name: %s; size: %u)", name, size);
#endif // KERNEL_HAS_DENTRY_XATTR_HANDLER
// add name prefix which has been removed by the generic function
if(!prefixedName)
return -ENOMEM;
strcpy(prefixedName, FHGFS_XATTR_SECURITY_PREFIX);
strcpy(prefixedName + sizeof(FHGFS_XATTR_SECURITY_PREFIX) - 1, name); // sizeof-1 to remove '\0'
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
res = FhgfsOps_getxattr(dentry, prefixedName, value, size);
#else
res = FhgfsOps_getxattr(inode, prefixedName, value, size);
#endif // KERNEL_HAS_DENTRY_XATTR_HANDLER
kfree(prefixedName);
return res;
}
/**
* The set-function which is used for all the security.* xattrs.
*/
#if defined(KERNEL_HAS_XATTR_HANDLERS_INODE_ARG)
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
static int FhgfsXAttr_setSecurity(const struct xattr_handler* handler, struct mnt_idmap* idmap,
struct dentry* dentry, struct inode* inode, const char* name, const void* value, size_t size,
int flags)
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
static int FhgfsXAttr_setSecurity(const struct xattr_handler* handler, struct user_namespace* mnt_userns,
struct dentry* dentry, struct inode* inode, const char* name, const void* value, size_t size,
int flags)
#else
static int FhgfsXAttr_setSecurity(const struct xattr_handler* handler, struct dentry* dentry,
struct inode* inode, const char* name, const void* value, size_t size, int flags)
#endif
#elif defined(KERNEL_HAS_XATTR_HANDLER_PTR_ARG)
static int FhgfsXAttr_setSecurity(const struct xattr_handler* handler, struct dentry* dentry,
const char* name, const void* value, size_t size, int flags)
#elif defined(KERNEL_HAS_DENTRY_XATTR_HANDLER)
static int FhgfsXAttr_setSecurity(struct dentry* dentry, const char* name, const void* value, size_t size,
int flags, int handler_flags)
#endif
{
FhgfsOpsErr res;
char* prefixedName = os_kmalloc(strlen(name) + sizeof(FHGFS_XATTR_SECURITY_PREFIX) );
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
FhgfsOpsHelper_logOpDebug(FhgfsOps_getApp(dentry->d_sb), dentry, NULL, __func__,
"(name: %s)", name);
#else
FhgfsOpsHelper_logOpDebug(FhgfsOps_getApp(inode->i_sb), NULL, inode, __func__,
"(name: %s)", name);
#endif
// add name prefix which has been removed by the generic function
if(!prefixedName)
return -ENOMEM;
strcpy(prefixedName, FHGFS_XATTR_SECURITY_PREFIX);
strcpy(prefixedName + sizeof(FHGFS_XATTR_SECURITY_PREFIX) - 1, name); // sizeof-1 to remove '\0'
if (value)
{
#ifdef KERNEL_HAS_DENTRY_XATTR_HANDLER
res = FhgfsOps_setxattr(dentry, prefixedName, value, size, flags);
#else
res = FhgfsOps_setxattr(inode, prefixedName, value, size, flags);
#endif
}
else
{
res = FhgfsOps_removexattr(dentry, prefixedName);
}
kfree(prefixedName);
return res;
}
#ifdef KERNEL_HAS_GET_ACL
struct xattr_handler fhgfs_xattr_acl_access_handler =
{
#ifdef KERNEL_HAS_XATTR_HANDLER_NAME
.name = XATTR_NAME_POSIX_ACL_ACCESS,
#else
.prefix = XATTR_NAME_POSIX_ACL_ACCESS,
#endif
.flags = ACL_TYPE_ACCESS,
.list = NULL,
.get = FhgfsXAttrGetACL,
.set = FhgfsXAttrSetACL,
};
struct xattr_handler fhgfs_xattr_acl_default_handler =
{
#ifdef KERNEL_HAS_XATTR_HANDLER_NAME
.name = XATTR_NAME_POSIX_ACL_DEFAULT,
#else
.prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
#endif
.flags = ACL_TYPE_DEFAULT,
.list = NULL,
.get = FhgfsXAttrGetACL,
.set = FhgfsXAttrSetACL,
};
#endif // KERNEL_HAS_GET_ACL
struct xattr_handler fhgfs_xattr_user_handler =
{
.prefix = FHGFS_XATTR_USER_PREFIX,
.list = NULL,
.set = FhgfsXAttr_setUser,
.get = FhgfsXAttr_getUser,
};
struct xattr_handler fhgfs_xattr_security_handler =
{
.prefix = FHGFS_XATTR_SECURITY_PREFIX,
.list = NULL,
.set = FhgfsXAttr_setSecurity,
.get = FhgfsXAttr_getSecurity,
};
#if defined(KERNEL_HAS_CONST_XATTR_CONST_PTR_HANDLER)
const struct xattr_handler* const fhgfs_xattr_handlers[] =
#elif defined(KERNEL_HAS_CONST_XATTR_HANDLER)
const struct xattr_handler* fhgfs_xattr_handlers[] =
#else
struct xattr_handler* fhgfs_xattr_handlers[] =
#endif
{
#ifdef KERNEL_HAS_GET_ACL
&fhgfs_xattr_acl_access_handler,
&fhgfs_xattr_acl_default_handler,
#endif
&fhgfs_xattr_user_handler,
&fhgfs_xattr_security_handler,
NULL
};
#if defined(KERNEL_HAS_CONST_XATTR_CONST_PTR_HANDLER)
const struct xattr_handler* const fhgfs_xattr_handlers_noacl[] =
#elif defined(KERNEL_HAS_CONST_XATTR_HANDLER)
const struct xattr_handler* fhgfs_xattr_handlers_noacl[] =
#else
struct xattr_handler* fhgfs_xattr_handlers_noacl[] =
#endif
{
&fhgfs_xattr_user_handler,
&fhgfs_xattr_security_handler,
NULL
};

View File

@@ -0,0 +1,29 @@
#ifndef FHGFSXATTRHANDLERS_H_
#define FHGFSXATTRHANDLERS_H_
#include <linux/xattr.h>
#if defined(KERNEL_HAS_CONST_XATTR_CONST_PTR_HANDLER)
#ifdef KERNEL_HAS_GET_ACL
extern const struct xattr_handler* const fhgfs_xattr_handlers[];
#endif
extern const struct xattr_handler* const fhgfs_xattr_handlers_noacl[];
#elif defined(KERNEL_HAS_CONST_XATTR_HANDLER)
#ifdef KERNEL_HAS_GET_ACL
extern const struct xattr_handler* fhgfs_xattr_handlers[];
#endif
extern const struct xattr_handler* fhgfs_xattr_handlers_noacl[];
#else
#ifdef KERNEL_HAS_GET_ACL
extern struct xattr_handler* fhgfs_xattr_handlers[];
#endif
extern struct xattr_handler* fhgfs_xattr_handlers_noacl[];
#endif
#endif /* FHGFSXATTRHANDLERS_H_ */

View File

@@ -0,0 +1,141 @@
#ifndef FSDIRINFO_H_
#define FSDIRINFO_H_
#include <common/storage/StorageDefinitions.h>
#include <common/toolkit/vector/Int64CpyVec.h>
#include <common/toolkit/vector/StrCpyVec.h>
#include <common/toolkit/vector/UInt8Vec.h>
#include <common/Common.h>
#include "FsObjectInfo.h"
struct FsDirInfo;
typedef struct FsDirInfo FsDirInfo;
static inline void FsDirInfo_init(FsDirInfo* this, App* app);
static inline FsDirInfo* FsDirInfo_construct(App* app);
static inline void FsDirInfo_uninit(FsObjectInfo* this);
// getters & setters
static inline loff_t FsDirInfo_getServerOffset(FsDirInfo* this);
static inline void FsDirInfo_setServerOffset(FsDirInfo* this, int64_t serverOffset);
static inline size_t FsDirInfo_getCurrentContentsPos(FsDirInfo* this);
static inline void FsDirInfo_setCurrentContentsPos(FsDirInfo* this, size_t currentContentsPos);
static inline struct StrCpyVec* FsDirInfo_getDirContents(FsDirInfo* this);
static inline struct StrCpyVec* FsDirInfo_getEntryIDs(FsDirInfo* this);
static inline struct UInt8Vec* FsDirInfo_getDirContentsTypes(FsDirInfo* this);
static inline struct Int64CpyVec* FsDirInfo_getServerOffsets(FsDirInfo* this);
static inline void FsDirInfo_setEndOfDir(FsDirInfo* this, bool endOfDir);
static inline bool FsDirInfo_getEndOfDir(FsDirInfo* this);
struct FsDirInfo
{
FsObjectInfo fsObjectInfo;
StrCpyVec dirContents; // entry names
UInt8Vec dirContentsTypes; // DirEntryType elements matching dirContents vector
StrCpyVec entryIDs; // entryID elements matching dirContents vector
Int64CpyVec serverOffsets; // dir entry offsets for telldir() matching dirContents vector
int64_t serverOffset; /* offset for the next incremental list request to the server
(equals last element of serverOffsets vector) */
size_t currentContentsPos; // current local pos in dirContents (>=0 && <dirContents_len)
bool endOfDir; // true if server reached end of dir entries during last query
};
void FsDirInfo_init(FsDirInfo* this, App* app)
{
FsObjectInfo_init( (FsObjectInfo*)this, app, FsObjectType_DIRECTORY);
StrCpyVec_init(&this->dirContents);
UInt8Vec_init(&this->dirContentsTypes);
StrCpyVec_init(&this->entryIDs);
Int64CpyVec_init(&this->serverOffsets);
this->serverOffset = 0;
this->currentContentsPos = 0;
this->endOfDir = false;
// assign virtual functions
( (FsObjectInfo*)this)->uninit = FsDirInfo_uninit;
}
struct FsDirInfo* FsDirInfo_construct(App* app)
{
struct FsDirInfo* this = (FsDirInfo*)os_kmalloc(sizeof(*this) );
if(likely(this) )
FsDirInfo_init(this, app);
return this;
}
void FsDirInfo_uninit(FsObjectInfo* this)
{
FsDirInfo* thisCast = (FsDirInfo*)this;
StrCpyVec_uninit(&thisCast->dirContents);
UInt8Vec_uninit(&thisCast->dirContentsTypes);
StrCpyVec_uninit(&thisCast->entryIDs);
Int64CpyVec_uninit(&thisCast->serverOffsets);
}
loff_t FsDirInfo_getServerOffset(FsDirInfo* this)
{
return this->serverOffset;
}
void FsDirInfo_setServerOffset(FsDirInfo* this, int64_t serverOffset)
{
this->serverOffset = serverOffset;
}
size_t FsDirInfo_getCurrentContentsPos(FsDirInfo* this)
{
return this->currentContentsPos;
}
void FsDirInfo_setCurrentContentsPos(FsDirInfo* this, size_t currentContentsPos)
{
this->currentContentsPos = currentContentsPos;
}
StrCpyVec* FsDirInfo_getDirContents(FsDirInfo* this)
{
return &this->dirContents;
}
StrCpyVec* FsDirInfo_getEntryIDs(FsDirInfo* this)
{
return &this->entryIDs;
}
/**
* @return vector of DirEntryType elements, matching dirContents vector
*/
UInt8Vec* FsDirInfo_getDirContentsTypes(FsDirInfo* this)
{
return &this->dirContentsTypes;
}
Int64CpyVec* FsDirInfo_getServerOffsets(FsDirInfo* this)
{
return &this->serverOffsets;
}
void FsDirInfo_setEndOfDir(FsDirInfo* this, bool endOfDir)
{
this->endOfDir = endOfDir;
}
bool FsDirInfo_getEndOfDir(FsDirInfo* this)
{
return this->endOfDir;
}
#endif /*FSDIRINFO_H_*/

View File

@@ -0,0 +1,156 @@
#include <common/storage/striping/StripePattern.h>
#include <filesystem/FsFileInfo.h>
#include <common/toolkit/StringTk.h>
struct FsFileInfo
{
FsObjectInfo fsObjectInfo;
unsigned accessFlags;
FileHandleType handleType;
bool appending;
ssize_t cacheHits; // hits/misses counter (min and max limited by thresholds)
bool allowCaching; // false when O_DIRECT or caching disabled in config
loff_t lastReadOffset; // offset after last read (to decide if IO would be a cache hit)
loff_t lastWriteOffset; // offset after last write (to decide if IO would be a cache hit)
bool usedEntryLocking; // true when entry lock methods were used (needed for cleanup)
};
/**
* @param accessFlags fhgfs access flags (not OS flags)
*/
void FsFileInfo_init(FsFileInfo* this, App* app, unsigned accessFlags, FileHandleType handleType)
{
FsObjectInfo_init( (FsObjectInfo*)this, app, FsObjectType_FILE);
this->accessFlags = accessFlags;
this->handleType = handleType;
this->appending = false;
this->cacheHits = FSFILEINFO_CACHE_HITS_INITIAL;
this->allowCaching = true;
this->lastReadOffset = 0;
this->lastWriteOffset = 0;
this->usedEntryLocking = false;
// assign virtual functions
( (FsObjectInfo*)this)->uninit = FsFileInfo_uninit;
}
/**
* @param accessFlags fhgfs access flags (not OS flags)
*/
struct FsFileInfo* FsFileInfo_construct(App* app, unsigned accessFlags, FileHandleType handleType)
{
struct FsFileInfo* this = (FsFileInfo*)os_kmalloc(sizeof(*this) );
if(likely(this) )
FsFileInfo_init(this, app, accessFlags, handleType);
return this;
}
void FsFileInfo_uninit(FsObjectInfo* this)
{
}
/**
* Increase cache hits counter.
*
* Note: Hits counter won't get higher than a certain threshold.
*/
void FsFileInfo_incCacheHits(FsFileInfo* this)
{
if(this->cacheHits < FSFILEINFO_CACHE_HITS_THRESHOLD)
this->cacheHits++;
}
/**
* Decrease cache hits counter.
*
* Note: Hits counter won't get lower than a certain threshold.
*/
void FsFileInfo_decCacheHits(FsFileInfo* this)
{
if(this->cacheHits > FSFILEINFO_CACHE_MISS_THRESHOLD)
this->cacheHits--;
}
unsigned FsFileInfo_getAccessFlags(FsFileInfo* this)
{
return this->accessFlags;
}
FileHandleType FsFileInfo_getHandleType(FsFileInfo* this)
{
return this->handleType;
}
void FsFileInfo_setAppending(FsFileInfo* this, bool appending)
{
this->appending = appending;
}
bool FsFileInfo_getAppending(FsFileInfo* this)
{
return this->appending;
}
ssize_t FsFileInfo_getCacheHits(FsFileInfo* this)
{
return this->cacheHits;
}
void FsFileInfo_setAllowCaching(FsFileInfo* this, bool allowCaching)
{
this->allowCaching = allowCaching;
}
bool FsFileInfo_getAllowCaching(FsFileInfo* this)
{
return this->allowCaching;
}
void FsFileInfo_setLastReadOffset(FsFileInfo* this, loff_t lastReadOffset)
{
this->lastReadOffset = lastReadOffset;
}
loff_t FsFileInfo_getLastReadOffset(FsFileInfo* this)
{
return this->lastReadOffset;
}
void FsFileInfo_setLastWriteOffset(FsFileInfo* this, loff_t lastWriteOffset)
{
this->lastWriteOffset = lastWriteOffset;
}
loff_t FsFileInfo_getLastWriteOffset(FsFileInfo* this)
{
return this->lastWriteOffset;
}
void FsFileInfo_getIOInfo(FsFileInfo* this, struct FhgfsInode* fhgfsInode,
struct RemotingIOInfo* outIOInfo)
{
FhgfsInode_getRefIOInfo(fhgfsInode, this->handleType, this->accessFlags, outIOInfo);
}
void FsFileInfo_setUsedEntryLocking(FsFileInfo* this)
{
this->usedEntryLocking = true;
}
bool FsFileInfo_getUsedEntryLocking(FsFileInfo* this)
{
return this->usedEntryLocking;
}

View File

@@ -0,0 +1,51 @@
#ifndef FSFILEINFO_H_
#define FSFILEINFO_H_
#include <common/Common.h>
#include <filesystem/FhgfsInode.h>
#include "FsObjectInfo.h"
#define FSFILEINFO_CACHE_HITS_INITIAL (3) /* some cache-optimistic initial hits value */
#define FSFILEINFO_CACHE_HITS_THRESHOLD (5) /* hits won't ever get higher than this number */
#define FSFILEINFO_CACHE_MISS_THRESHOLD (-5) /* hits won't ever get lower than this number */
#define FSFILEINFO_CACHE_SLOWSTART_READLEN (64*1024) /* smaller read-ahead for offset==0, e.g. if
some process looks only at file starts */
enum FileBufferType; // forward declaration
struct StripePattern; // forward declaration
struct FhgfsInode; // forward declaration
struct RemotingIOInfo; // forward declaration
struct FsFileInfo;
typedef struct FsFileInfo FsFileInfo;
extern void FsFileInfo_init(FsFileInfo* this, App* app, unsigned accessFlags,
FileHandleType handleType);
extern FsFileInfo* FsFileInfo_construct(App* app, unsigned accessFlags, FileHandleType handleType);
extern void FsFileInfo_uninit(FsObjectInfo* this);
extern void FsFileInfo_incCacheHits(FsFileInfo* this);
extern void FsFileInfo_decCacheHits(FsFileInfo* this);
// getters & setters
extern unsigned FsFileInfo_getAccessFlags(FsFileInfo* this);
extern FileHandleType FsFileInfo_getHandleType(FsFileInfo* this);
extern void FsFileInfo_setAppending(FsFileInfo* this, bool appending);
extern bool FsFileInfo_getAppending(FsFileInfo* this);
extern ssize_t FsFileInfo_getCacheHits(FsFileInfo* this);
extern void FsFileInfo_setAllowCaching(FsFileInfo* this, bool allowCaching);
extern bool FsFileInfo_getAllowCaching(FsFileInfo* this);
extern void FsFileInfo_setLastReadOffset(FsFileInfo* this, loff_t lastReadOffset);
extern loff_t FsFileInfo_getLastReadOffset(FsFileInfo* this);
extern void FsFileInfo_setLastWriteOffset(FsFileInfo* this, loff_t lastWriteOffset);
extern loff_t FsFileInfo_getLastWriteOffset(FsFileInfo* this);
extern void FsFileInfo_getIOInfo(FsFileInfo* this, struct FhgfsInode* fhgfsInode,
struct RemotingIOInfo* outIOInfo);
extern void FsFileInfo_setUsedEntryLocking(FsFileInfo* this);
extern bool FsFileInfo_getUsedEntryLocking(FsFileInfo* this);
#endif /*FSFILEINFO_H_*/

View File

@@ -0,0 +1,68 @@
#ifndef FSOBJECTINFO_H_
#define FSOBJECTINFO_H_
#include <common/Common.h>
#include <app/App.h>
enum FsObjectType
{FsObjectType_DIRECTORY=1, FsObjectType_FILE=2};
typedef enum FsObjectType FsObjectType;
/**
* Note: Consider this to be an abstract class. Mind the virtual function pointers.
*/
struct FsObjectInfo;
typedef struct FsObjectInfo FsObjectInfo;
static inline void FsObjectInfo_init(FsObjectInfo* this, App* app, FsObjectType objectType);
static inline void FsObjectInfo_virtualDestruct(FsObjectInfo* this);
// getters & setters
static inline App* FsObjectInfo_getApp(FsObjectInfo* this);
static inline FsObjectType FsObjectInfo_getObjectType(FsObjectInfo* this);
struct FsObjectInfo
{
App* app;
FsObjectType objectType;
// virtual functions
void (*uninit) (FsObjectInfo* this);
};
void FsObjectInfo_init(FsObjectInfo* this, App* app, FsObjectType objectType)
{
this->app = app;
this->objectType = objectType;
// clear virtual function pointer
this->uninit = NULL;
}
/**
* Calls the virtual uninit method and kfrees the object.
*/
void FsObjectInfo_virtualDestruct(FsObjectInfo* this)
{
this->uninit(this);
kfree(this);
}
App* FsObjectInfo_getApp(FsObjectInfo* this)
{
return this->app;
}
FsObjectType FsObjectInfo_getObjectType(FsObjectInfo* this)
{
return this->objectType;
}
#endif /*FSOBJECTINFO_H_*/

View File

@@ -0,0 +1,742 @@
#include <filesystem/ProcFsHelper.h>
#include "ProcFs.h"
#include <linux/fs.h>
#include <linux/stat.h>
#define BEEGFS_PROC_DIR_NAME "fs/" BEEGFS_MODULE_NAME_STR
#define BEEGFS_PROC_NAMEBUF_LEN 4096
#define BEEGFS_PROC_ENTRY_CONFIG "config"
#define BEEGFS_PROC_ENTRY_STATUS ".status"
#define BEEGFS_PROC_ENTRY_FSUUID "fs_uuid"
#define BEEGFS_PROC_ENTRY_MGMTNODES "mgmt_nodes"
#define BEEGFS_PROC_ENTRY_METANODES "meta_nodes"
#define BEEGFS_PROC_ENTRY_STORAGENODES "storage_nodes"
#define BEEGFS_PROC_ENTRY_CLIENTINFO "client_info"
#define BEEGFS_PROC_ENTRY_RETRIESENABLED "conn_retries_enabled"
#define BEEGFS_PROC_ENTRY_NETBENCHENABLED "netbench_mode"
#define BEEGFS_PROC_ENTRY_DROPCONNS "drop_conns"
#define BEEGFS_PROC_ENTRY_LOGLEVELS "log_levels"
#define BEEGFS_PROC_ENTRY_METATARGETSTATES "meta_target_state"
#define BEEGFS_PROC_ENTRY_STORAGETARGETSTATES "storage_target_state"
#define BEEGFS_PROC_ENTRY_REMAPCONNFAILURE "remap_connection_failure"
/**
* Initializer for read-only proc file ops
*/
#if defined(KERNEL_HAS_PROC_OPS)
#define BEEGFS_PROC_FOPS_INITIALIZER \
.proc_open = __ProcFs_open, \
.proc_read = seq_read, \
.proc_lseek = seq_lseek, \
.proc_release = single_release
#else
#define BEEGFS_PROC_FOPS_INITIALIZER \
.open = __ProcFs_open, \
.read = seq_read, \
.llseek = seq_lseek, \
.release = single_release
#endif
#if defined(KERNEL_HAS_PROC_OPS)
#define PROC_OPS_WRITE_MEMBER proc_write
#else
#define PROC_OPS_WRITE_MEMBER write
#endif
/**
* generic file ops for procfs entries
*/
#if defined(KERNEL_HAS_PROC_OPS)
static const struct proc_ops fhgfs_proc_fops =
#else
static const struct file_operations fhgfs_proc_fops =
#endif
{
BEEGFS_PROC_FOPS_INITIALIZER
};
/*
* for read-only entries: contains name and "show method" for a procfs entry
*/
struct fhgfs_proc_file
{
char name[32]; // filename
int (*show)(struct seq_file *, void *); // the "show method" of this file
};
/**
* all our read-only procfs entries (terminated by empty element)
*/
static const struct fhgfs_proc_file fhgfs_proc_files[] =
{
{ BEEGFS_PROC_ENTRY_CONFIG, &__ProcFs_readV2_config },
{ BEEGFS_PROC_ENTRY_STATUS, &__ProcFs_readV2_status },
{ BEEGFS_PROC_ENTRY_FSUUID, &__ProcFs_readV2_fsUUID },
{ BEEGFS_PROC_ENTRY_MGMTNODES, &__ProcFs_readV2_mgmtNodes },
{ BEEGFS_PROC_ENTRY_METANODES, &__ProcFs_readV2_metaNodes },
{ BEEGFS_PROC_ENTRY_STORAGENODES, &__ProcFs_readV2_storageNodes },
{ BEEGFS_PROC_ENTRY_CLIENTINFO, &__ProcFs_readV2_clientInfo },
{ BEEGFS_PROC_ENTRY_METATARGETSTATES, &__ProcFs_readV2_metaTargetStates },
{ BEEGFS_PROC_ENTRY_STORAGETARGETSTATES, &__ProcFs_readV2_storageTargetStates },
{ "", NULL } // last element must be empty (for loop termination)
};
/*
* for read+write entries: contains name, show method and write method for a procfs entry
*/
struct fhgfs_proc_file_rw
{
char name[32]; // filename
int (*show)(struct seq_file *, void *); // the show method of this file
//ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); // the write method
#if defined(KERNEL_HAS_PROC_OPS)
struct proc_ops proc_fops;
#else
struct file_operations proc_fops;
#endif
};
/**
* all our read+write procfs entries (terminated by empty element).
*
* other than for read-only entries, we need to assign different write methods during proc entry
* registration, so we need indidivual file_operations for each entry here.
*/
static const struct fhgfs_proc_file_rw fhgfs_proc_files_rw[] =
{
{ BEEGFS_PROC_ENTRY_RETRIESENABLED, &__ProcFs_readV2_connRetriesEnabled,
{
BEEGFS_PROC_FOPS_INITIALIZER,
.PROC_OPS_WRITE_MEMBER = &__ProcFs_writeV2_connRetriesEnabled,
},
},
{ BEEGFS_PROC_ENTRY_NETBENCHENABLED, &__ProcFs_readV2_netBenchModeEnabled,
{
BEEGFS_PROC_FOPS_INITIALIZER,
.PROC_OPS_WRITE_MEMBER = &__ProcFs_writeV2_netBenchModeEnabled,
},
},
{ BEEGFS_PROC_ENTRY_DROPCONNS, &__ProcFs_readV2_nothing,
{
BEEGFS_PROC_FOPS_INITIALIZER,
.PROC_OPS_WRITE_MEMBER = &__ProcFs_writeV2_dropConns,
},
},
{ BEEGFS_PROC_ENTRY_LOGLEVELS, &__ProcFs_readV2_logLevels,
{
BEEGFS_PROC_FOPS_INITIALIZER,
.PROC_OPS_WRITE_MEMBER = &__ProcFs_writeV2_logLevels,
},
},
{ BEEGFS_PROC_ENTRY_REMAPCONNFAILURE, &__ProcFs_read_remapConnectionFailure,
{
BEEGFS_PROC_FOPS_INITIALIZER,
.PROC_OPS_WRITE_MEMBER = &__ProcFs_write_remapConnectionFailure,
},
},
// last element must be empty (for loop termination)
{{ 0 }}
};
/**
* Creates the general parent dir. Meant to be called only once at module load.
*/
void ProcFs_createGeneralDir(void)
{
struct proc_dir_entry* procDir;
procDir = proc_mkdir(BEEGFS_PROC_DIR_NAME, NULL);
if(!procDir)
printk_fhgfs(KERN_INFO, "Failed to create proc dir: " BEEGFS_PROC_DIR_NAME "\n");
}
/**
* Removes the general parent dir. Meant to be called only once at module unload.
*/
void ProcFs_removeGeneralDir(void)
{
remove_proc_entry(BEEGFS_PROC_DIR_NAME, NULL);
}
/**
* Creates the dir and entries for a specific mountpoint.
* Note: Uses sessionID to create a unique dir for the mountpoint.
*/
void ProcFs_createEntries(App* app)
{
NodeString sessionID;
Node* localNode = App_getLocalNode(app);
struct proc_dir_entry* procDir;
const struct fhgfs_proc_file* procFile;
const struct fhgfs_proc_file_rw* procFileRW;
// create unique directory for this clientID and store app pointer as "->data"
char* dirNameBuf = vmalloc(BEEGFS_PROC_NAMEBUF_LEN);
Node_copyAlias(localNode, &sessionID);
scnprintf(dirNameBuf, BEEGFS_PROC_NAMEBUF_LEN, BEEGFS_PROC_DIR_NAME "/%s", sessionID.buf);
procDir = __ProcFs_mkDir(dirNameBuf, app);
if(!procDir)
{
printk_fhgfs(KERN_INFO, "Failed to create proc dir: %s\n", dirNameBuf);
goto clean_up;
}
// create entries
/* note: linux-3.10 kills create_proc_(read_)entry and uses proc_create_data instead.
proc_create_data existed for ealier linux version already, so we use it there, too. */
// create read-only proc files
for(procFile = fhgfs_proc_files; procFile->name[0]; procFile++)
{
struct proc_dir_entry* currentProcFsEntry = proc_create_data(
procFile->name, S_IFREG | S_IRUGO, procDir, &fhgfs_proc_fops, procFile->show);
if(!currentProcFsEntry)
{
printk_fhgfs(KERN_INFO, "Failed to create read-only proc entry in %s: %s\n",
dirNameBuf, procFile->name);
goto clean_up;
}
}
// create read+write proc files
for(procFileRW = fhgfs_proc_files_rw; procFileRW->name[0]; procFileRW++)
{
struct proc_dir_entry* currentProcFsEntry = proc_create_data(
procFileRW->name, S_IFREG | S_IRUGO | S_IWUSR | S_IWGRP, procDir, &procFileRW->proc_fops,
procFileRW->show);
if(!currentProcFsEntry)
{
printk_fhgfs(KERN_INFO, "Failed to create read+write proc entry in %s: %s\n",
dirNameBuf, procFileRW->name);
goto clean_up;
}
}
clean_up:
SAFE_VFREE(dirNameBuf);
}
/**
* Removes the dir and entries for a specific mountpoint.
* Note: Uses sessionID for unique dir of the mountpoint.
*/
void ProcFs_removeEntries(App* app)
{
char* dirNameBuf = vmalloc(BEEGFS_PROC_NAMEBUF_LEN);
char* entryNameBuf = vmalloc(BEEGFS_PROC_NAMEBUF_LEN);
Node* localNode = App_getLocalNode(app);
NodeString sessionID;
Node_copyAlias(localNode, &sessionID);
scnprintf(dirNameBuf, BEEGFS_PROC_NAMEBUF_LEN, BEEGFS_PROC_DIR_NAME "/%s", sessionID.buf);
// remove entries
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_CONFIG);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_STATUS);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_FSUUID);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_MGMTNODES);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_METANODES);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_STORAGENODES);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_CLIENTINFO);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_METATARGETSTATES);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_STORAGETARGETSTATES);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_RETRIESENABLED);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_REMAPCONNFAILURE);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_NETBENCHENABLED);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_DROPCONNS);
remove_proc_entry(entryNameBuf, NULL);
scnprintf(entryNameBuf, BEEGFS_PROC_NAMEBUF_LEN, "%s/%s",
dirNameBuf, BEEGFS_PROC_ENTRY_LOGLEVELS);
remove_proc_entry(entryNameBuf, NULL);
// remove unique dir
remove_proc_entry(dirNameBuf, NULL);
SAFE_VFREE(dirNameBuf);
SAFE_VFREE(entryNameBuf);
}
/**
* called when a procfs file is being opened.
*
* this method handles the assignment of the corresponding readV2 method for a certain entry.
*/
int __ProcFs_open(struct inode* inode, struct file* file)
{
int (*show)(struct seq_file *, void *) = __ProcFs_getProcDirEntryDataField(inode);
App* app = __ProcFs_getProcParentDirEntryDataField(inode); // (app is ->data in parent dir)
return single_open(file, show, app);
}
/**
* Does not return anything to the reading process.
* Intended for proc entries that are write-only.
*/
int __ProcFs_readV2_nothing(struct seq_file* file, void* p)
{
return 0;
}
/**
* Does not return anything to the reading process.
* Intended for proc entries that are write-only.
*
* @param data specified at entry creation
*/
int ProcFs_read_nothing(char* buf, char** start, off_t offset, int size, int* eof, void* data)
{
*eof = 1;
return 0;
}
int __ProcFs_readV2_config(struct seq_file* file, void* p)
{
App* app = file->private;
return ProcFsHelper_readV2_config(file, app);
}
/**
* @param data specified at entry creation
*/
int ProcFs_read_config(char* buf, char** start, off_t offset, int size, int* eof, void* data)
{
return ProcFsHelper_read_config(buf, start, offset, size, eof, (App*)data);
}
int __ProcFs_readV2_status(struct seq_file* file, void* v)
{
App* app = file->private;
return ProcFsHelper_readV2_status(file, app);
}
/**
* @param data specified at entry creation
*/
int ProcFs_read_status(char* buf, char** start, off_t offset, int size, int* eof, void* data)
{
return ProcFsHelper_read_status(buf, start, offset, size, eof, (App*)data);
}
int __ProcFs_readV2_mgmtNodes(struct seq_file* file, void* p)
{
App* app = file->private;
NodeStoreEx* nodes = App_getMgmtNodes(app);
return ProcFsHelper_readV2_nodes(file, app, nodes);
}
int __ProcFs_readV2_fsUUID(struct seq_file* file, void* p)
{
App* app = file->private;
return ProcFsHelper_readV2_fsUUID(file, app);
}
/**
* @param data specified at entry creation
*/
int ProcFs_read_mgmtNodes(char* buf, char** start, off_t offset, int size, int* eof, void* data)
{
return ProcFsHelper_read_nodes(buf, start, offset, size, eof,
App_getMgmtNodes( (App*)data) );
}
int __ProcFs_readV2_metaNodes(struct seq_file* file, void* p)
{
App* app = file->private;
NodeStoreEx* nodes = App_getMetaNodes(app);
return ProcFsHelper_readV2_nodes(file, app, nodes);
}
/**
* @param data specified at entry creation
*/
int ProcFs_read_metaNodes(char* buf, char** start, off_t offset, int size, int* eof, void* data)
{
return ProcFsHelper_read_nodes(buf, start, offset, size, eof,
App_getMetaNodes( (App*)data) );
}
int __ProcFs_readV2_storageNodes(struct seq_file* file, void* p)
{
App* app = file->private;
NodeStoreEx* nodes = App_getStorageNodes(app);
return ProcFsHelper_readV2_nodes(file, app, nodes);
}
/**
* @param data specified at entry creation
*/
int ProcFs_read_storageNodes(char* buf, char** start, off_t offset, int size, int* eof, void* data)
{
return ProcFsHelper_read_nodes(buf, start, offset, size, eof,
App_getStorageNodes( (App*)data) );
}
int __ProcFs_readV2_clientInfo(struct seq_file* file, void* p)
{
App* app = file->private;
return ProcFsHelper_readV2_clientInfo(file, app);
}
int ProcFs_read_clientInfo(char* buf, char** start, off_t offset, int size, int* eof,
void* data)
{
return ProcFsHelper_read_clientInfo(buf, start, offset, size, eof, (App*)data);
}
int __ProcFs_readV2_metaTargetStates(struct seq_file* file, void* p)
{
App* app = file->private;
TargetStateStore* metaStates = App_getMetaStateStore(app);
NodeStoreEx* nodes = App_getMetaNodes(app);
return ProcFsHelper_readV2_targetStates(file, app, metaStates, nodes, true);
}
/**
* @param data specified at entry creation
*/
int ProcFs_read_metaTargetStates(char* buf, char** start, off_t offset, int size, int* eof,
void* data)
{
return ProcFsHelper_read_targetStates(buf, start, offset, size, eof, (App*)data,
App_getMetaStateStore( (App*)data), App_getMetaNodes( (App*)data), true);
}
int __ProcFs_readV2_storageTargetStates(struct seq_file* file, void* p)
{
App* app = file->private;
TargetStateStore* targetStates = App_getTargetStateStore(app);
NodeStoreEx* nodes = App_getStorageNodes(app);
return ProcFsHelper_readV2_targetStates(file, app, targetStates, nodes, false);
}
/**
* @param data specified at entry creation
*/
int ProcFs_read_storageTargetStates(char* buf, char** start, off_t offset, int size, int* eof,
void* data)
{
return ProcFsHelper_read_targetStates(buf, start, offset, size, eof, (App*)data,
App_getTargetStateStore( (App*)data), App_getStorageNodes( (App*)data), false);
}
int __ProcFs_readV2_connRetriesEnabled(struct seq_file* file, void* p)
{
App* app = file->private;
return ProcFsHelper_readV2_connRetriesEnabled(file, app);
}
int ProcFs_read_connRetriesEnabled(char* buf, char** start, off_t offset, int size, int* eof,
void* data)
{
return ProcFsHelper_read_connRetriesEnabled(buf, start, offset, size, eof, (App*)data);
}
/**
* @param data specified at entry creation
*/
ssize_t __ProcFs_writeV2_connRetriesEnabled(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct inode* procInode = file_inode(file);
App* app = __ProcFs_getProcParentDirEntryDataField(procInode); // (app is ->data in parent dir)
// check user buffer
if(unlikely(!os_access_ok(VERIFY_READ, buf, count) ) )
return -EFAULT;
return ProcFsHelper_write_connRetriesEnabled(buf, count, app);
}
int ProcFs_write_connRetriesEnabled(struct file* file, const char __user *buf,
unsigned long count, void* data)
{
// check user buffer
if(unlikely(!os_access_ok(VERIFY_READ, buf, count) ) )
return -EFAULT;
return ProcFsHelper_write_connRetriesEnabled(buf, count, (App*)data);
}
int __ProcFs_read_remapConnectionFailure(struct seq_file* file, void* p)
{
App* app = file->private;
return ProcFsHelper_read_remapConnectionFailure(file, app);
}
ssize_t __ProcFs_write_remapConnectionFailure(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct inode* procInode = file_inode(file);
App* app = __ProcFs_getProcParentDirEntryDataField(procInode); // (app is ->data in parent dir)
// check user buffer
if(unlikely(!os_access_ok(VERIFY_READ, buf, count) ) )
return -EFAULT;
return ProcFsHelper_write_remapConnectionFailure(buf, count, app);
}
int __ProcFs_readV2_netBenchModeEnabled(struct seq_file* file, void* p)
{
App* app = file->private;
return ProcFsHelper_readV2_netBenchModeEnabled(file, app);
}
/**
* @param data specified at entry creation
*/
int ProcFs_read_netBenchModeEnabled(char* buf, char** start, off_t offset, int size, int* eof,
void* data)
{
return ProcFsHelper_read_netBenchModeEnabled(buf, start, offset, size, eof, (App*)data);
}
ssize_t __ProcFs_writeV2_netBenchModeEnabled(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct inode* procInode = file_inode(file);
App* app = __ProcFs_getProcParentDirEntryDataField(procInode); // (app is ->data in parent dir)
// check user buffer
if(unlikely(!os_access_ok(VERIFY_READ, buf, count) ) )
return -EFAULT;
return ProcFsHelper_write_netBenchModeEnabled(buf, count, app);
}
/**
* @param data specified at entry creation
*/
int ProcFs_write_netBenchModeEnabled(struct file* file, const char __user *buf,
unsigned long count, void* data)
{
// check user buffer
if(unlikely(!os_access_ok(VERIFY_READ, buf, count) ) )
return -EFAULT;
return ProcFsHelper_write_netBenchModeEnabled(buf, count, (App*)data);
}
ssize_t __ProcFs_writeV2_dropConns(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct inode* procInode = file_inode(file);
App* app = __ProcFs_getProcParentDirEntryDataField(procInode); // (app is ->data in parent dir)
// check user buffer
if(unlikely(!os_access_ok(VERIFY_READ, buf, count) ) )
return -EFAULT;
return ProcFsHelper_write_dropConns(buf, count, app);
}
/**
* @param data specified at entry creation
*/
int ProcFs_write_dropConns(struct file* file, const char __user *buf,
unsigned long count, void* data)
{
// check user buffer
if(unlikely(!os_access_ok(VERIFY_READ, buf, count) ) )
return -EFAULT;
return ProcFsHelper_write_dropConns(buf, count, (App*)data);
}
int __ProcFs_readV2_logLevels(struct seq_file* file, void* p)
{
App* app = file->private;
return ProcFsHelper_readV2_logLevels(file, app);
}
/**
* @param data specified at entry creation
*/
int ProcFs_read_logLevels(char* buf, char** start, off_t offset, int size, int* eof,
void* data)
{
return ProcFsHelper_read_logLevels(buf, start, offset, size, eof, (App*)data);
}
ssize_t __ProcFs_writeV2_logLevels(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct inode* procInode = file_inode(file);
App* app = __ProcFs_getProcParentDirEntryDataField(procInode); // (app is ->data in parent dir)
// check user buffer
if(unlikely(!os_access_ok(VERIFY_READ, buf, count) ) )
return -EFAULT;
return ProcFsHelper_write_logLevels(buf, count, app);
}
/**
* @param data specified at entry creation
*/
int ProcFs_write_logLevels(struct file* file, const char __user *buf,
unsigned long count, void* data)
{
// check user buffer
if(unlikely(!os_access_ok(VERIFY_READ, buf, count) ) )
return -EFAULT;
return ProcFsHelper_write_logLevels(buf, count, (App*)data);
}
/**
* Create a proc dir.
*
* Note: This is actually just a compat method for proc_mkdir_data.
*
* @param data arbitrary private value to be assigned to procDir->data
*/
struct proc_dir_entry* __ProcFs_mkDir(const char* name, void* data)
{
/* newer kernels do no longer export struct proc_dir_entry, so the data field is only
accessible through special kernel methods. */
struct proc_dir_entry* procDir;
#if defined(KERNEL_HAS_PDE_DATA) || defined(KERNEL_HAS_NEW_PDE_DATA)
procDir = proc_mkdir_data(name, 0, NULL, data);
#else
procDir = proc_mkdir(name, NULL);
if(procDir)
procDir->data = data;
#endif // KERNEL_HAS_PDE_DATA
return procDir;
}
/**
* Return the data field of a proc entry.
*
* Note: This is actually just a compat method for PDE_DATA.
*/
void* __ProcFs_getProcDirEntryDataField(const struct inode* procInode)
{
/* newer kernels do no longer export struct proc_dir_entry, so the data field is only
accessible through special kernel methods. */
#ifdef KERNEL_HAS_PDE_DATA
return PDE_DATA(procInode);
#elif defined(KERNEL_HAS_NEW_PDE_DATA)
return pde_data(procInode);
#else
struct proc_dir_entry* procEntry = PDE(procInode);
return procEntry->data; // (app is stored as ->data in parent dir)
#endif // KERNEL_HAS_PDE_DATA
}
/**
* Return the data field from the parent dir of a proc entry.
*
* Note: This is actually just a compat method for proc_get_parent_data.
*/
void* __ProcFs_getProcParentDirEntryDataField(const struct inode* procInode)
{
/* newer kernels do no longer export struct proc_dir_entry, so the ->parent and ->data fields are
only accessible through special kernel methods. */
#if defined(KERNEL_HAS_PDE_DATA) || defined(KERNEL_HAS_NEW_PDE_DATA)
return proc_get_parent_data(procInode);
#else
struct proc_dir_entry* procEntry = PDE(procInode);
return procEntry->parent->data; // (app is stored as ->data in parent dir)
#endif // KERNEL_HAS_PDE_DATA
}

View File

@@ -0,0 +1,90 @@
#ifndef PROCFS_H_
#define PROCFS_H_
#include <common/nodes/TargetStateStore.h>
#include <common/Common.h>
#include <app/App.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
extern void ProcFs_createGeneralDir(void);
extern void ProcFs_removeGeneralDir(void);
extern void ProcFs_createEntries(App* app);
extern void ProcFs_removeEntries(App* app);
extern int __ProcFs_open(struct inode* inode, struct file* file);
extern int __ProcFs_readV2_nothing(struct seq_file* file, void* p);
extern int __ProcFs_readV2_config(struct seq_file* file, void* p);
extern int __ProcFs_readV2_status(struct seq_file* file, void* p);
extern int __ProcFs_readV2_fsUUID(struct seq_file* file, void* p);
extern int __ProcFs_readV2_mgmtNodes(struct seq_file* file, void* p);
extern int __ProcFs_readV2_metaNodes(struct seq_file* file, void* p);
extern int __ProcFs_readV2_storageNodes(struct seq_file* file, void* p);
extern int __ProcFs_readV2_clientInfo(struct seq_file* file, void* p);
extern int __ProcFs_readV2_metaTargetStates(struct seq_file* file, void* p);
extern int __ProcFs_readV2_storageTargetStates(struct seq_file* file, void* p);
extern int __ProcFs_readV2_connRetriesEnabled(struct seq_file* file, void* p);
extern int __ProcFs_readV2_netBenchModeEnabled(struct seq_file* file, void* p);
extern int __ProcFs_readV2_logLevels(struct seq_file* file, void* p);
extern ssize_t __ProcFs_writeV2_connRetriesEnabled(struct file *file, const char __user *buf,
size_t count, loff_t *ppos);
extern ssize_t __ProcFs_writeV2_netBenchModeEnabled(struct file *file, const char __user *buf,
size_t count, loff_t *ppos);
extern ssize_t __ProcFs_writeV2_dropConns(struct file *file, const char __user *buf,
size_t count, loff_t *ppos);
extern ssize_t __ProcFs_writeV2_logLevels(struct file *file, const char __user *buf,
size_t count, loff_t *ppos);
extern int __ProcFs_read_remapConnectionFailure(struct seq_file* file, void* p);
extern ssize_t __ProcFs_write_remapConnectionFailure(struct file *file, const char __user *buf,
size_t count, loff_t *ppos);
extern int ProcFs_read_nothing(
char* buf, char** start, off_t offset, int size, int* eof,void* data);
extern int ProcFs_read_config(
char* buf, char** start, off_t offset, int size, int* eof,void* data);
extern int ProcFs_read_status(
char* buf, char** start, off_t offset, int size, int* eof, void* data);
extern int ProcFs_read_mgmtNodes(
char* buf, char** start, off_t offset, int size, int* eof, void* data);
extern int ProcFs_read_metaNodes(
char* buf, char** start, off_t offset, int size, int* eof, void* data);
extern int ProcFs_read_storageNodes(
char* buf, char** start, off_t offset, int size, int* eof, void* data);
extern int ProcFs_read_clientInfo(
char* buf, char** start, off_t offset, int size, int* eof, void* data);
extern int ProcFs_read_metaTargetStates(
char* buf, char** start, off_t offset, int size, int* eof, void* data);
extern int ProcFs_read_storageTargetStates(
char* buf, char** start, off_t offset, int size, int* eof, void* data);
extern int ProcFs_read_connRetriesEnabled(char* buf, char** start, off_t offset, int size, int* eof,
void* data);
extern unsigned ProcFs_read_remapConnectionFailure(char* buf, char** start, off_t offset, int size, int* eof,
void* data);
extern int ProcFs_read_netBenchModeEnabled(char* buf, char** start, off_t offset, int size,
int* eof, void* data);
extern int ProcFs_read_logLevels(char* buf, char** start, off_t offset, int size, int* eof,
void* data);
extern int ProcFs_write_connRetriesEnabled(struct file* file, const char __user *buf,
unsigned long count, void* data);
extern int ProcFs_write_remapConnectionFailure(struct file* file, const char __user *buf,
unsigned long count, void* data);
extern int ProcFs_write_netBenchModeEnabled(struct file* file, const char __user *buf,
unsigned long count, void* data);
extern int ProcFs_write_dropConns(struct file* file, const char __user *buf,
unsigned long count, void* data);
extern int ProcFs_write_logLevels(struct file* file, const char __user *buf, unsigned long count,
void* data);
extern struct proc_dir_entry* __ProcFs_mkDir(const char* name, void* data);
extern void* __ProcFs_getProcDirEntryDataField(const struct inode* procInode);
extern void* __ProcFs_getProcParentDirEntryDataField(const struct inode* procInode);
#endif /* PROCFS_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,55 @@
#ifndef PROCFSHELPER_H_
#define PROCFSHELPER_H_
#include <app/App.h>
#include <common/Common.h>
#include <nodes/NodeStoreEx.h>
#include <linux/seq_file.h>
extern int ProcFsHelper_readV2_config(struct seq_file* file, App* app);
extern int ProcFsHelper_readV2_status(struct seq_file* file, App* app);
extern int ProcFsHelper_readV2_fsUUID(struct seq_file* file, App* app);
extern int ProcFsHelper_readV2_nodes(struct seq_file* file, App* app, struct NodeStoreEx* nodes);
extern int ProcFsHelper_readV2_clientInfo(struct seq_file* file, App* app);
extern int ProcFsHelper_readV2_targetStates(struct seq_file* file, App* app,
struct TargetStateStore* targetStates, struct NodeStoreEx* nodes, bool isMeta);
extern int ProcFsHelper_readV2_connRetriesEnabled(struct seq_file* file, App* app);
extern int ProcFsHelper_readV2_netBenchModeEnabled(struct seq_file* file, App* app);
extern int ProcFsHelper_readV2_logLevels(struct seq_file* file, App* app);
extern int ProcFsHelper_read_config(char* buf, char** start, off_t offset, int size, int* eof,
App* app);
extern int ProcFsHelper_read_status(char* buf, char** start, off_t offset, int size, int* eof,
App* app);
extern int ProcFsHelper_read_nodes(char* buf, char** start, off_t offset, int size, int* eof,
struct NodeStoreEx* nodes);
extern int ProcFsHelper_read_clientInfo(char* buf, char** start, off_t offset, int size, int* eof,
App* app);
extern int ProcFsHelper_read_targetStates(char* buf, char** start, off_t offset, int size, int* eof,
App* app, struct TargetStateStore* targetStates, struct NodeStoreEx* nodes, bool isMeta);
extern int ProcFsHelper_read_connRetriesEnabled(char* buf, char** start, off_t offset, int size,
int* eof, App* app);
extern int ProcFsHelper_write_connRetriesEnabled(const char __user *buf,
unsigned long count, App* app);
extern int ProcFsHelper_read_remapConnectionFailure(struct seq_file* file, App* app);
extern int ProcFsHelper_write_remapConnectionFailure(const char __user *buf, unsigned long count, App* app);
extern int ProcFsHelper_read_netBenchModeEnabled(char* buf, char** start, off_t offset, int size,
int* eof, App* app);
extern int ProcFsHelper_write_netBenchModeEnabled(const char __user *buf,
unsigned long count, App* app);
extern int ProcFsHelper_write_dropConns(const char __user *buf, unsigned long count, App* app);
extern int ProcFsHelper_read_logLevels(char* buf, char** start, off_t offset, int size, int* eof,
App* app);
extern int ProcFsHelper_write_logLevels(const char __user *buf, unsigned long count, App* app);
extern void __ProcFsHelper_printGotRootV2(struct seq_file* file, Node* node, NodeStoreEx* nodes);
extern void __ProcFsHelper_printGotRoot(struct Node* node, struct NodeStoreEx* nodes, char* buf,
int* pcount, int* psize);
extern void __ProcFsHelper_printNodeConnsV2(struct seq_file* file, struct Node* node);
extern void __ProcFsHelper_printNodeConns(struct Node* node, char* buf, int* pcount, int* psize);
#endif /* PROCFSHELPER_H_ */

View File

@@ -0,0 +1,326 @@
/*
* Ioctl helper functions
*/
#include <app/log/Logger.h>
#include <common/toolkit/list/UInt16List.h>
#include "IoctlHelper.h"
#define STRDUP_OR_RETURN(target, source, len, name) \
do { \
if ((len) == 0) \
return -EINVAL; \
(target) = strndup_user((const char __user *) (source), (len)); \
if (IS_ERR((target))) \
{ \
int error = PTR_ERR((target)); \
(target) = NULL; \
LOG_DEBUG_FORMATTED(log, Log_NOTICE, logContext, "Invalid " name " string"); \
return error; \
} \
} while (0)
/**
* Copy struct BeegfsIoctl_MkFile_Arg from user to kernel space
*
* Note: For simplicity, the calling function needs to free alloced outFileInfo members, even in
* case of errors. Therefore, *outFileInfo is supposed to be memset() with 0 by the caller.
*
* @return 0 on success, negative linux error code otherwise
*/
long IoctlHelper_ioctlCreateFileCopyFromUser(App* app, void __user *argp,
struct BeegfsIoctl_MkFileV3_Arg* outFileInfo)
{
Logger* log = App_getLogger(app);
const char* logContext = __func__;
struct BeegfsIoctl_MkFile_Arg userFileInfo; // fileInfo still with pointers into user space
memset(&userFileInfo, 0, sizeof(userFileInfo) ); // avoid clang warning
if (copy_from_user(&userFileInfo, argp, sizeof(userFileInfo) ) )
return -EFAULT;
/* we cannot simply do: outFileInfo = userFileInfo, as that would overwrite all NULL pointers
* and we use those NULL pointers to simplify free(), so we copy integers one by one */
outFileInfo->ownerNodeID = userFileInfo.ownerNodeID;
outFileInfo->parentParentEntryIDLen = userFileInfo.parentParentEntryIDLen;
outFileInfo->parentEntryIDLen = userFileInfo.parentEntryIDLen;
outFileInfo->parentNameLen = userFileInfo.parentNameLen;
outFileInfo->entryNameLen = userFileInfo.entryNameLen;
outFileInfo->symlinkToLen = userFileInfo.symlinkToLen;
outFileInfo->mode = userFileInfo.mode;
outFileInfo->uid = userFileInfo.uid;
outFileInfo->gid = userFileInfo.gid;
outFileInfo->numTargets = userFileInfo.numTargets;
outFileInfo->prefTargetsLen = userFileInfo.prefTargetsLen;
outFileInfo->fileType = userFileInfo.fileType;
outFileInfo->parentIsBuddyMirrored = false;
outFileInfo->storagePoolId = STORAGEPOOLID_INVALIDPOOLID;
/* Now copy and alloc all char* to kernel space */
STRDUP_OR_RETURN(outFileInfo->parentParentEntryID, userFileInfo.parentParentEntryID,
outFileInfo->parentParentEntryIDLen, "parentParentEntryID");
STRDUP_OR_RETURN(outFileInfo->parentEntryID, userFileInfo.parentEntryID,
outFileInfo->parentEntryIDLen, "parentEntryID");
STRDUP_OR_RETURN(outFileInfo->parentName, userFileInfo.parentName, outFileInfo->parentNameLen,
"parentName");
STRDUP_OR_RETURN(outFileInfo->entryName, userFileInfo.entryName, outFileInfo->entryNameLen,
"entryName");
if (userFileInfo.fileType == DT_LNK && userFileInfo.symlinkToLen > 0)
STRDUP_OR_RETURN(outFileInfo->symlinkTo, userFileInfo.symlinkTo, outFileInfo->symlinkToLen,
"symlinkTo");
// copy prefTargets array and verify it...
/* Note: try not to exceed a page, as kmalloc might fail for high order allocations. However,
* that should only happen with very high number of stripes and we will limit the number
* in fhgfs-ctl. */
// check if given num targets actually fits inside given targets array
// (note: +1 for terminating 0 element)
if(outFileInfo->prefTargetsLen < ( (outFileInfo->numTargets + 1) * (int) sizeof(uint16_t) ) )
{
Logger_logFormatted(log, Log_WARNING, logContext,
"prefTargetsLen(=%d) too small for given numTargets(=%d+1)",
outFileInfo->prefTargetsLen,
outFileInfo->numTargets);
return -EINVAL;
}
// copy prefTargets to kernel buffer based on raw prefTargetsLen
outFileInfo->prefTargets = memdup_user( (char __user *) userFileInfo.prefTargets,
outFileInfo->prefTargetsLen);
if (IS_ERR(outFileInfo->prefTargets) )
{
int error = PTR_ERR(outFileInfo->prefTargets);
LOG_DEBUG_FORMATTED(log, Log_NOTICE, logContext, "Unable to copy prefTargets array");
outFileInfo->prefTargets = NULL;
return error;
}
// check if prefTargets given by user space has a terminating zero as last array element
// (note: not +1, because numTargets count does not include terminating zero element)
if(outFileInfo->prefTargets[outFileInfo->numTargets] != 0)
{
Logger_logFormatted(log, Log_WARNING, logContext, "prefTargets array is not zero-terminated");
return -EINVAL;
}
return 0;
}
long IoctlHelper_ioctlCreateFileCopyFromUserV2(App* app, void __user *argp,
struct BeegfsIoctl_MkFileV3_Arg* outFileInfo)
{
Logger* log = App_getLogger(app);
const char* logContext = __func__;
struct BeegfsIoctl_MkFileV2_Arg userFileInfo; // fileInfo still with pointers into user space
memset(&userFileInfo, 0, sizeof(userFileInfo)); // avoid clang warning
if (copy_from_user(&userFileInfo, argp, sizeof(userFileInfo)))
return -EFAULT;
/* we cannot simply do: outFileInfo = userFileInfo, as that would overwrite all NULL pointers
* and we use those NULL pointers to simplify free(), so we copy integers one by one */
outFileInfo->ownerNodeID = userFileInfo.ownerNodeID;
outFileInfo->parentParentEntryIDLen = userFileInfo.parentParentEntryIDLen;
outFileInfo->parentEntryIDLen = userFileInfo.parentEntryIDLen;
outFileInfo->parentNameLen = userFileInfo.parentNameLen;
outFileInfo->entryNameLen = userFileInfo.entryNameLen;
outFileInfo->symlinkToLen = userFileInfo.symlinkToLen;
outFileInfo->mode = userFileInfo.mode;
outFileInfo->uid = userFileInfo.uid;
outFileInfo->gid = userFileInfo.gid;
outFileInfo->numTargets = userFileInfo.numTargets;
outFileInfo->prefTargetsLen = userFileInfo.prefTargetsLen;
outFileInfo->fileType = userFileInfo.fileType;
outFileInfo->parentIsBuddyMirrored = userFileInfo.parentIsBuddyMirrored;
outFileInfo->storagePoolId = STORAGEPOOLID_INVALIDPOOLID;
/* Now copy and alloc all char* to kernel space */
STRDUP_OR_RETURN(outFileInfo->parentParentEntryID, userFileInfo.parentParentEntryID,
outFileInfo->parentParentEntryIDLen, "parentParentEntryID");
STRDUP_OR_RETURN(outFileInfo->parentEntryID, userFileInfo.parentEntryID,
outFileInfo->parentEntryIDLen, "parentEntryID");
STRDUP_OR_RETURN(outFileInfo->parentName, userFileInfo.parentName, outFileInfo->parentNameLen,
"parentName");
STRDUP_OR_RETURN(outFileInfo->entryName, userFileInfo.entryName, outFileInfo->entryNameLen,
"entryName");
if (userFileInfo.fileType == DT_LNK && userFileInfo.symlinkToLen > 0)
STRDUP_OR_RETURN(outFileInfo->symlinkTo, userFileInfo.symlinkTo, outFileInfo->symlinkToLen,
"symlinkTo");
// copy prefTargets array and verify it...
/* Note: try not to exceed a page, as kmalloc might fail for high order allocations. However,
* that should only happen with very high number of stripes and we will limit the number
* in fhgfs-ctl. */
// check if given num targets actually fits inside given targets array
// (note: +1 for terminating 0 element)
if(outFileInfo->prefTargetsLen < ( (outFileInfo->numTargets + 1) * (int) sizeof(uint16_t) ) )
{
Logger_logFormatted(log, Log_WARNING, logContext,
"prefTargetsLen(=%d) too small for given numTargets(=%d+1)",
outFileInfo->prefTargetsLen,
outFileInfo->numTargets);
return -EINVAL;
}
// copy prefTargets to kernel buffer based on raw prefTargetsLen
outFileInfo->prefTargets = memdup_user( (char __user *) userFileInfo.prefTargets,
outFileInfo->prefTargetsLen);
if (IS_ERR(outFileInfo->prefTargets) )
{
int error = PTR_ERR(outFileInfo->prefTargets);
LOG_DEBUG_FORMATTED(log, Log_NOTICE, logContext, "Unable to copy prefTargets array");
outFileInfo->prefTargets = NULL;
return error;
}
// check if prefTargets given by user space has a terminating zero as last array element
// (note: not +1, because numTargets count does not include terminating zero element)
if(outFileInfo->prefTargets[outFileInfo->numTargets] != 0)
{
Logger_logFormatted(log, Log_WARNING, logContext, "prefTargets array is not zero-terminated");
return -EINVAL;
}
return 0;
}
long IoctlHelper_ioctlCreateFileCopyFromUserV3(App* app, void __user *argp,
struct BeegfsIoctl_MkFileV3_Arg* outFileInfo)
{
Logger* log = App_getLogger(app);
const char* logContext = __func__;
struct BeegfsIoctl_MkFileV3_Arg userFileInfo; // fileInfo still with pointers into user space
memset(&userFileInfo, 0, sizeof(userFileInfo)); // avoid clang warning
if (copy_from_user(&userFileInfo, argp, sizeof(userFileInfo)))
return -EFAULT;
/* we cannot simply do: outFileInfo = userFileInfo, as that would overwrite all NULL pointers
* and we use those NULL pointers to simplify free(), so we copy integers one by one */
outFileInfo->ownerNodeID = userFileInfo.ownerNodeID;
outFileInfo->parentParentEntryIDLen = userFileInfo.parentParentEntryIDLen;
outFileInfo->parentEntryIDLen = userFileInfo.parentEntryIDLen;
outFileInfo->parentNameLen = userFileInfo.parentNameLen;
outFileInfo->entryNameLen = userFileInfo.entryNameLen;
outFileInfo->symlinkToLen = userFileInfo.symlinkToLen;
outFileInfo->mode = userFileInfo.mode;
outFileInfo->uid = userFileInfo.uid;
outFileInfo->gid = userFileInfo.gid;
outFileInfo->numTargets = userFileInfo.numTargets;
outFileInfo->prefTargetsLen = userFileInfo.prefTargetsLen;
outFileInfo->fileType = userFileInfo.fileType;
outFileInfo->parentIsBuddyMirrored = userFileInfo.parentIsBuddyMirrored;
outFileInfo->storagePoolId = userFileInfo.storagePoolId;
/* Now copy and alloc all char* to kernel space */
STRDUP_OR_RETURN(outFileInfo->parentParentEntryID, userFileInfo.parentParentEntryID,
outFileInfo->parentParentEntryIDLen, "parentParentEntryID");
STRDUP_OR_RETURN(outFileInfo->parentEntryID, userFileInfo.parentEntryID,
outFileInfo->parentEntryIDLen, "parentEntryID");
STRDUP_OR_RETURN(outFileInfo->parentName, userFileInfo.parentName, outFileInfo->parentNameLen,
"parentName");
STRDUP_OR_RETURN(outFileInfo->entryName, userFileInfo.entryName, outFileInfo->entryNameLen,
"entryName");
if (userFileInfo.fileType == DT_LNK && userFileInfo.symlinkToLen > 0)
STRDUP_OR_RETURN(outFileInfo->symlinkTo, userFileInfo.symlinkTo, outFileInfo->symlinkToLen,
"symlinkTo");
// copy prefTargets array and verify it...
/* Note: try not to exceed a page, as kmalloc might fail for high order allocations. However,
* that should only happen with very high number of stripes and we will limit the number
* in fhgfs-ctl. */
// check if given num targets actually fits inside given targets array
// (note: +1 for terminating 0 element)
if(outFileInfo->prefTargetsLen < ( (outFileInfo->numTargets + 1) * (int) sizeof(uint16_t) ) )
{
Logger_logFormatted(log, Log_WARNING, logContext,
"prefTargetsLen(=%d) too small for given numTargets(=%d+1)",
outFileInfo->prefTargetsLen,
outFileInfo->numTargets);
return -EINVAL;
}
// copy prefTargets to kernel buffer based on raw prefTargetsLen
outFileInfo->prefTargets = memdup_user( (char __user *) userFileInfo.prefTargets,
outFileInfo->prefTargetsLen);
if (IS_ERR(outFileInfo->prefTargets) )
{
int error = PTR_ERR(outFileInfo->prefTargets);
LOG_DEBUG_FORMATTED(log, Log_NOTICE, logContext, "Unable to copy prefTargets array");
outFileInfo->prefTargets = NULL;
return error;
}
// check if prefTargets given by user space has a terminating zero as last array element
// (note: not +1, because numTargets count does not include terminating zero element)
if(outFileInfo->prefTargets[outFileInfo->numTargets] != 0)
{
Logger_logFormatted(log, Log_WARNING, logContext, "prefTargets array is not zero-terminated");
return -EINVAL;
}
return 0;
}
/**
* Copy targets from fileInfo->prefTargets array to outCreateInfo->preferredStorageTargets list
*
* @param outCreateInfo outCreateInfo->preferredStorageTargets will be alloced and needs to be freed
* by caller even in case an error is returned (if it is !=NULL)
* @return 0 on success, negative linux error code otherwise
*/
int IoctlHelper_ioctlCreateFileTargetsToList(App* app, struct BeegfsIoctl_MkFileV3_Arg* fileInfo,
struct CreateInfo* outCreateInfo)
{
Logger* log = App_getLogger(app);
const char* logContext = __func__;
int i;
// construct output list
outCreateInfo->preferredStorageTargets =
os_kmalloc(sizeof(*outCreateInfo->preferredStorageTargets));
if(unlikely(!outCreateInfo->preferredStorageTargets) )
return -ENOMEM;
UInt16List_init(outCreateInfo->preferredStorageTargets);
// copy all targets from array to list
for(i=0; i < fileInfo->numTargets; i++)
{
uint16_t currentTarget = (fileInfo->prefTargets)[i];
if(unlikely(!currentTarget) )
{ // invalid target in the middle of the array
Logger_logFormatted(log, Log_WARNING, logContext,
"Invalid preferred target at this array index: %d (numTargets: %d)\n",
i, fileInfo->numTargets);
return -EINVAL;
}
UInt16List_append(outCreateInfo->preferredStorageTargets, currentTarget);
}
return 0;
}

View File

@@ -0,0 +1,37 @@
/*
* Ioctl helper functions
*/
#ifndef IOCTLHELPER_H_
#define IOCTLHELPER_H_
#include <app/App.h>
#include <app/config/Config.h>
#include <os/OsCompat.h>
#include <filesystem/FhgfsOpsSuper.h>
#include <filesystem/FhgfsOpsHelper.h>
#include <filesystem/FhgfsOpsIoctl.h>
#ifdef CONFIG_COMPAT
#include <asm/compat.h>
#include <filesystem/FhgfsOpsInode.h>
#endif
#include <linux/mount.h>
extern long IoctlHelper_ioctlCreateFileCopyFromUser(App* app, void __user *argp,
struct BeegfsIoctl_MkFileV3_Arg* outFileInfo);
extern long IoctlHelper_ioctlCreateFileCopyFromUserV2(App* app, void __user *argp,
struct BeegfsIoctl_MkFileV3_Arg* outFileInfo);
extern long IoctlHelper_ioctlCreateFileCopyFromUserV3(App* app, void __user *argp,
struct BeegfsIoctl_MkFileV3_Arg* outFileInfo);
extern int IoctlHelper_ioctlCreateFileTargetsToList(App* app,
struct BeegfsIoctl_MkFileV3_Arg* fileInfo, struct CreateInfo* outCreateInfo);
#endif /* IOCTLHELPER_H_ */