2025-08-10 01:34:16 +02:00

1373 lines
43 KiB
C

#include <app/log/Logger.h>
#include <app/App.h>
#include <common/threading/AtomicInt.h>
#include <common/toolkit/StringTk.h>
#include <common/toolkit/vector/StrCpyVec.h>
#include <common/storage/striping/StripePattern.h>
#include <common/storage/Path.h>
#include <common/storage/StorageDefinitions.h>
#include <filesystem/FhgfsInode.h>
#include <filesystem/FhgfsOpsHelper.h>
#include <filesystem/FsDirInfo.h>
#include <toolkit/InodeRefStore.h>
#include <toolkit/NoAllocBufferStore.h>
#include <os/iov_iter.h>
#include "FhgfsOpsDir.h"
#include "FhgfsOpsHelper.h"
/**
* Log file system operations and optionally additional messages.
* Used to trace function calls and to print the path the function is to operate on.
*
* @param level The log level
* @param dentry Common vfs directory entry
* @param logContext Usually the name of the calling function
* @param msgStr Optional message string, may be NULL
* @param ... Optional arguments according to format given in msgStr
*/
void FhgfsOpsHelper_logOpMsg(int level, App* app, struct dentry* dentry, struct inode* inode,
const char *logContext, const char *msgStr, ...)
{
Logger* log = App_getLogger(app);
NoAllocBufferStore* bufStore = App_getPathBufStore(app);
char* pathStoreBuf = NULL; // NoAllocBufferStore_addBuf() can detect a wrong NULL...
char* path = NULL;
const char* entryID = NULL;
const char* noPath = "n/a (no dentry)";
const char* noEntryID = "n/a (no inode)";
if(level > Logger_getLogLevel(log) )
return;
if(dentry)
{
path = __FhgfsOps_pathResolveToStoreBuf(bufStore, dentry, &pathStoreBuf);
if(IS_ERR(path) )
path = NULL;
}
if(inode)
{ // get entryInfo lock for entryID
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
const EntryInfo* entryInfo;
FhgfsInode_entryInfoReadLock(fhgfsInode); // L O C K entryInfo
entryInfo = FhgfsInode_getEntryInfo(fhgfsInode);
entryID = EntryInfo_getEntryID(entryInfo);
}
if(msgStr)
{ // generate new msg string for given msg formatting
va_list ap;
char* newMsg;
newMsg = kmalloc(LOGGER_LOGBUF_SIZE, GFP_NOFS);
if(newMsg)
{
int prefixLen = snprintf(newMsg, LOGGER_LOGBUF_SIZE, "called. Path: %s; EntryID: %s; %s",
path ? path : noPath,
entryID ? entryID : noEntryID,
msgStr);
va_start(ap, msgStr);
vsnprintf(newMsg + prefixLen, LOGGER_LOGBUF_SIZE - prefixLen, msgStr, ap);
va_end(ap);
Logger_logFormatted(log, level, logContext, "%s", newMsg);
kfree(newMsg);
}
else
{ // alloc failed, still try to log the operation at least
Logger_logFormatted(log, level, logContext, "called. Path: %s; EntryID: %s; (msg n/a)",
path ? path : noPath,
entryID ? entryID : noEntryID);
}
}
else
Logger_logFormatted(log, level, logContext, "called. Path: %s; EntryID: %s",
path ? path : noPath,
entryID ? entryID : noEntryID);
if(inode)
{ // release entryInfo lock
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
FhgfsInode_entryInfoReadUnlock(fhgfsInode); // U N L O C K entryInfo
}
if(pathStoreBuf)
NoAllocBufferStore_addBuf(bufStore, pathStoreBuf);
}
ssize_t FhgfsOpsHelper_appendfileVecOffset(FhgfsInode* fhgfsInode, struct iov_iter *iter,
size_t count, RemotingIOInfo* ioInfo, loff_t offsetFromEnd, loff_t* outNewOffset)
{
App* app = ioInfo->app;
ssize_t writeRes = 0;
FhgfsOpsErr lockRes;
FhgfsOpsErr statRes;
fhgfs_stat fhgfsStat;
// get MDS append lock...
lockRes = FhgfsOpsHelper_getAppendLock(fhgfsInode, ioInfo);
if(unlikely(lockRes != FhgfsOpsErr_SUCCESS) )
return -lockRes;
// get current file size from servers...
FhgfsInode_entryInfoReadLock(fhgfsInode); // LOCK EntryInfo
statRes = FhgfsOpsRemoting_statDirect(app, FhgfsInode_getEntryInfo(fhgfsInode), &fhgfsStat);
FhgfsInode_entryInfoReadUnlock(fhgfsInode); // UNLOCK EntryInfo
if(unlikely(statRes != FhgfsOpsErr_SUCCESS) )
{ // remote stat error
writeRes = -statRes;
goto unlock_and_exit;
}
// the actual remote write...
fhgfsStat.size += offsetFromEnd;
(void) count; // count currently not used, writefileVec looks at iter->count
BUG_ON(count != iov_iter_count(iter));
writeRes = FhgfsOpsRemoting_writefileVec(iter, fhgfsStat.size, ioInfo, false);
if(writeRes >= 0)
fhgfsStat.size += writeRes;
*outNewOffset = fhgfsStat.size;
unlock_and_exit:
FhgfsOpsHelper_releaseAppendLock(fhgfsInode, ioInfo);
return writeRes;
}
/**
* Append data to a file, protected by MDS locking.
*
* Note: This method does not try to flush local file buffers after acquiring the MDS lock, because
* this method might be called during a file buffer flush (so callers must ensure that there are
* no conflicing local file buffers).
*
* @param size buffer length to be appended
* @param outNewOffset new file offset after append completes (only valid if no error returned)
* @return number of bytes written or negative fhgfs error code
*/
static ssize_t FhgfsOpsHelper_appendfile_kernel(FhgfsInode* fhgfsInode, const char *buf, size_t size,
RemotingIOInfo* ioInfo, loff_t* outNewOffset)
{
struct iov_iter *iter = STACK_ALLOC_BEEGFS_ITER_KVEC(buf, size, WRITE);
return FhgfsOpsHelper_appendfileVecOffset(fhgfsInode, iter, size, ioInfo, 0, outNewOffset);
}
/**
* Wrapper for FhgfsOpsRemoting_writefile, but this automatically calls _appendfile() if -1 offset
* is given.
*
* @param offset offset in file, -1 for append
*/
static ssize_t FhgfsOpsHelper_writefileEx(FhgfsInode* fhgfsInode,
struct iov_iter *iter, size_t size, loff_t offset, RemotingIOInfo* ioInfo)
{
if(offset == -1)
return FhgfsOpsHelper_appendfileVecOffset(fhgfsInode, iter, size, ioInfo, 0, &offset);
else
return FhgfsOpsRemoting_writefileVec(iter, offset, ioInfo, false);
}
static ssize_t FhgfsOpsHelper_writefileEx_kernel(FhgfsInode* fhgfsInode,
const char *buf, size_t size, loff_t offset, RemotingIOInfo* ioInfo)
{
if(offset == -1)
return FhgfsOpsHelper_appendfile_kernel(fhgfsInode, buf, size, ioInfo, &offset);
else
return FhgfsOpsRemoting_writefile_kernel(buf, size, offset, ioInfo);
}
/**
* Refreshes the dirInfo by retrieving a new version from the nodes (but only if required or
* forced).
*
* Note: This method guarantees either valid local names range for currentServerOffset or empty
* dirContents list on completion.
*
* @param dirInfo holds the relevant serverOffset for the refresh
* @param forceUpdate use this to force an update (e.g. when the user seeked)
* @return negative linux error code on error, 0 otherwise
*/
int FhgfsOpsHelper_refreshDirInfoIncremental(App* app, const EntryInfo* entryInfo,
FsDirInfo* dirInfo, bool forceUpdate)
{
Logger* log = App_getLogger(app);
const char* logContext = "FhgfsOpsHelper (refresh dir info incremental)";
const unsigned maxNames = 100; // max number of retrieved names
int retVal = 0;
FhgfsOpsErr listRes = FhgfsOpsErr_SUCCESS;
StrCpyVec* dirContents = FsDirInfo_getDirContents(dirInfo);
size_t dirContentsLen = StrCpyVec_length(dirContents);
size_t currentContentsPos = FsDirInfo_getCurrentContentsPos(dirInfo); // pos inside contents vec
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext, "called. EntryID: %s, Owner: %hu",
entryInfo->entryID, entryInfo->owner.node.value);
IGNORE_UNUSED_VARIABLE(log);
IGNORE_UNUSED_VARIABLE(logContext);
if(forceUpdate)
{ // user seeked backwards (or something else makes an update necessary)
LOG_DEBUG(log, Log_SPAM, logContext, "forced update of dir contents");
listRes = FhgfsOpsRemoting_listdirFromOffset(entryInfo, dirInfo, maxNames);
}
else
if(FsDirInfo_getEndOfDir(dirInfo) && (currentContentsPos >= dirContentsLen) )
{ // we reached the end of the dir contents => do nothing
LOG_DEBUG(log, Log_SPAM, logContext, "reached end of dir contents by offset");
StrCpyVec_clear(FsDirInfo_getDirContents(dirInfo) );
}
else
if(currentContentsPos < dirContentsLen)
{ // inside the local contents region => do nothing
LOG_DEBUG_FORMATTED(log, Log_SPAM, logContext,
"offset inside local contents region: %lld <= %lld",
(long long)currentContentsPos, (long long)dirContentsLen);
}
else
{ // initial retrieval or offset outside current local contents region
LOG_DEBUG(log, Log_SPAM, logContext, "retrieving by offset");
listRes = FhgfsOpsRemoting_listdirFromOffset(entryInfo, dirInfo, maxNames);
}
if(unlikely(listRes != FhgfsOpsErr_SUCCESS) )
{ // error
StrCpyVec_clear(FsDirInfo_getDirContents(dirInfo) );
FsDirInfo_setCurrentContentsPos(dirInfo, 0);
Logger_logFormatted(log, Log_DEBUG, logContext, "result: %s",
FhgfsOpsErr_toErrString(listRes) );
retVal = FhgfsOpsErr_toSysErr(listRes);
}
return retVal;
}
/**
* Flush cache buffer and return it to the store, but return immediately if the cache lock cannot
* be acquired immediately.
*
* @param discardCacheOnError true to discard a write cache if the remote write
* was not successful; false to just release it to the store in this case
* @return FhgfsOpsErr_INUSE if cache lock is contended and nothing has been flushed.
*/
FhgfsOpsErr FhgfsOpsHelper_flushCacheNoWait(App* app, FhgfsInode* fhgfsInode,
bool discardCacheOnError)
{
int gotLock; // 1 if we got the lock, 0 otherwise
FhgfsOpsErr retVal;
gotLock = FhgfsInode_fileCacheExclusiveTryLock(fhgfsInode); // (T R Y) L O C K
if(gotLock != 1)
return FhgfsOpsErr_INUSE;
retVal = __FhgfsOpsHelper_flushCacheUnlocked(app, fhgfsInode, discardCacheOnError);
FhgfsInode_fileCacheExclusiveUnlock(fhgfsInode); // U N L O C K
return retVal;
}
/**
* Buffered writing.
* Tries to add written data to an existing cache or flushes the cache (if any) and delegates to
* writeCacheFlushed().
*
* Note: This method also updates FsFileInfo cache hits counter.
*
* @param offset offset in file, -1 for append
* @return number of bytes written or negative fhgfs error code
*/
ssize_t FhgfsOpsHelper_writeCached(struct iov_iter *iter, size_t size,
loff_t offset, FhgfsInode* fhgfsInode, FsFileInfo* fileInfo, RemotingIOInfo* ioInfo)
{
App* app = ioInfo->app;
NoAllocBufferStore* cacheStore = App_getCacheBufStore(app);
ssize_t retVal;
bool isAppendWrite = (offset == -1); // true if this is an append write
CacheBuffer* cacheBuffer;
enum FileBufferType cacheType;
bool isAppendCacheBuf; // true if the current cache buffer is an append write buffer
size_t bufLenLeft; // remaining buffer length
loff_t currentCacheEndOffset;
int userBufCopyRes;
if (app->cfg->tuneCoherentBuffers)
{
i_mmap_lock_read(fhgfsInode->vfs_inode.i_mapping);
if (beegfs_hasMappings(&fhgfsInode->vfs_inode))
{
FsFileInfo_decCacheHits(fileInfo);
i_mmap_unlock_read(fhgfsInode->vfs_inode.i_mapping);
return FhgfsOpsHelper_writefileEx(fhgfsInode, iter, size, offset, ioInfo);
}
i_mmap_unlock_read(fhgfsInode->vfs_inode.i_mapping);
}
FhgfsInode_fileCacheExclusiveLock(fhgfsInode); // L O C K
cacheBuffer = Fhgfsinode_getFileCacheBuffer(fhgfsInode);
cacheType = cacheBuffer->bufType;
if(cacheType == FileBufferType_NONE)
{ // file doesn't have any cache buffer currently
// update cache hits by guessing whether this would have been a cache hit if we had a buffer
if(isAppendWrite)
FsFileInfo_incCacheHits(fileInfo); // could have been a cache hit
else
{
loff_t virtualCacheStart = FsFileInfo_getLastWriteOffset(fileInfo);
loff_t virtualCacheEnd = virtualCacheStart + NoAllocBufferStore_getBufSize(cacheStore) - 1;
if( (offset >= virtualCacheStart) && (offset <= virtualCacheEnd) )
FsFileInfo_incCacheHits(fileInfo); // could have been a cache hit
else
FsFileInfo_decCacheHits(fileInfo); // would probably not have been a cache hit
}
retVal = __FhgfsOpsHelper_writeCacheFlushed(iter, size, offset, fhgfsInode, fileInfo, ioInfo);
goto unlock_and_exit;
}
if(cacheType == FileBufferType_READ)
{ // file has a read cache => just discard the cached data
FsFileInfo_decCacheHits(fileInfo);
__FhgfsOpsHelper_discardCache(app, fhgfsInode);
retVal = __FhgfsOpsHelper_writeCacheFlushed(iter, size, offset, fhgfsInode, fileInfo, ioInfo);
goto unlock_and_exit;
}
// file has a write cache => can we extend it or do we have to flush it?
isAppendCacheBuf = cacheBuffer->fileOffset == -1; // -1 offset means append buf
currentCacheEndOffset = cacheBuffer->fileOffset + cacheBuffer->bufUsageLen;
bufLenLeft = cacheBuffer->bufUsageMaxLen - cacheBuffer->bufUsageLen;
if( (isAppendWrite != isAppendCacheBuf) ||
(!isAppendWrite && (offset != currentCacheEndOffset) ) ||
(size > bufLenLeft) )
{ // offset doesn't fit or not enough cache room left => flush cache
FhgfsOpsErr flushRes;
FsFileInfo_decCacheHits(fileInfo);
flushRes = __FhgfsOpsHelper_flushCacheUnlocked(app, fhgfsInode, false);
if(unlikely(flushRes != FhgfsOpsErr_SUCCESS) )
{ // flush failed
retVal = -flushRes;
goto unlock_and_exit;
}
retVal = __FhgfsOpsHelper_writeCacheFlushed(iter, size, offset, fhgfsInode, fileInfo, ioInfo);
goto unlock_and_exit;
}
// offset fits and we have enough room left for the write => copy data to cache buf (and update)
FsFileInfo_incCacheHits(fileInfo);
userBufCopyRes = copy_from_iter(&(cacheBuffer->buf)[cacheBuffer->bufUsageLen], size, iter);
if(unlikely(userBufCopyRes != size))
{ // copy failed
Logger* log = App_getLogger(app);
Logger_log(log, Log_DEBUG, __func__, "Buffer copy from userspace failed (invalid buffer)");
retVal = -FhgfsOpsErr_ADDRESSFAULT;
goto unlock_and_exit;
}
cacheBuffer->bufUsageLen += size;
bufLenLeft = cacheBuffer->bufUsageMaxLen - cacheBuffer->bufUsageLen; // re-calc remaining bufLen
// write has been completely cached => check whether the cache is full now
if(!bufLenLeft)
{ // cache buf used up => flush it
FhgfsOpsErr flushRes = __FhgfsOpsHelper_flushCacheUnlocked(app, fhgfsInode, false);
if(unlikely(flushRes != FhgfsOpsErr_SUCCESS) )
{ // flush failed
retVal = -flushRes;
goto unlock_and_exit;
}
}
// write was successful if we got here
retVal = size;
unlock_and_exit:
FhgfsInode_fileCacheExclusiveUnlock(fhgfsInode); // U N L O C K
return retVal;
}
/**
* Buffered reading.
* Tries to read data from an existing cache or flushes the cache and delegates to
* readCacheFlushed().
*
* @return number of bytes read or negative fhgfs error code
*/
ssize_t FhgfsOpsHelper_readCached(struct iov_iter *iter, size_t size, loff_t offset,
FhgfsInode* fhgfsInode, FsFileInfo* fileInfo, RemotingIOInfo* ioInfo)
{
App* app = ioInfo->app;
NoAllocBufferStore* cacheStore = App_getCacheBufStore(app);
ssize_t retVal;
CacheBuffer* cacheBuffer;
enum FileBufferType cacheType;
loff_t readEndOffset = offset + size - 1;
loff_t cacheEndOffset;
ssize_t remoteReadSize;
loff_t cacheCopyOffset;
size_t cacheCopySize;
if (app->cfg->tuneCoherentBuffers)
{
i_mmap_lock_read(fhgfsInode->vfs_inode.i_mapping);
if (beegfs_hasMappings(&fhgfsInode->vfs_inode))
{
FsFileInfo_decCacheHits(fileInfo);
i_mmap_unlock_read(fhgfsInode->vfs_inode.i_mapping);
return FhgfsOpsRemoting_readfileVec(iter, size, offset, ioInfo, fhgfsInode);
}
i_mmap_unlock_read(fhgfsInode->vfs_inode.i_mapping);
}
// fast path for parallel readers (other path takes exclusive lock)
if(FhgfsInode_getIsFileOpenByMultipleReaders(fhgfsInode) ||
!FsFileInfo_getAllowCaching(fileInfo) )
{ // no caching needed
ssize_t readRes;
FhgfsInode_fileCacheSharedLock(fhgfsInode); // L O C K (shared)
cacheBuffer = Fhgfsinode_getFileCacheBuffer(fhgfsInode);
cacheType = cacheBuffer->bufType;
if(cacheType != FileBufferType_NONE)
{ // file already has buffer attached, so we need exclusive lock
FhgfsInode_fileCacheSharedUnlock(fhgfsInode); // U N L O C K (shared)
goto exclusive_lock_path;
}
readRes = FhgfsOpsRemoting_readfileVec(iter, size, offset, ioInfo, fhgfsInode);
FhgfsInode_fileCacheSharedUnlock(fhgfsInode); // U N L O C K (shared)
return readRes;
}
exclusive_lock_path:
FhgfsInode_fileCacheExclusiveLock(fhgfsInode); // L O C K (exclusive)
cacheBuffer = Fhgfsinode_getFileCacheBuffer(fhgfsInode);
cacheType = cacheBuffer->bufType;
if(cacheType == FileBufferType_NONE)
{ // file has no buffer attached
// update cache hits by guessing whether this would have been a cache hit if we had a buffer
loff_t virtualCacheStart = FsFileInfo_getLastReadOffset(fileInfo);
loff_t virtualCacheEnd = virtualCacheStart + NoAllocBufferStore_getBufSize(cacheStore) - 1;
if( (virtualCacheEnd >= offset) && (virtualCacheStart <= readEndOffset) )
FsFileInfo_incCacheHits(fileInfo); // range overlap => could have been a cache hit
else
FsFileInfo_decCacheHits(fileInfo); // would probably not have been a cache hit
retVal = __FhgfsOpsHelper_readCacheFlushed(iter, size, offset, fhgfsInode, fileInfo, ioInfo);
goto unlock_and_exit;
}
if(cacheType == FileBufferType_WRITE)
{ // file has a write cache => flush it
FhgfsOpsErr flushRes;
FsFileInfo_decCacheHits(fileInfo);
flushRes = __FhgfsOpsHelper_flushCacheUnlocked(app, fhgfsInode, false);
if(unlikely(flushRes != FhgfsOpsErr_SUCCESS) )
{ // flush failed
retVal = -flushRes;
goto unlock_and_exit;
}
retVal = __FhgfsOpsHelper_readCacheFlushed(iter, size, offset, fhgfsInode, fileInfo, ioInfo);
goto unlock_and_exit;
}
// file has a read cache => does it overlap with the read range?
cacheEndOffset = cacheBuffer->fileOffset + cacheBuffer->bufUsageLen - 1;
if( (cacheEndOffset < offset) ||
(cacheBuffer->fileOffset > readEndOffset) )
{ // cache range and read range do not overlap => flush
FhgfsOpsErr flushRes;
FsFileInfo_decCacheHits(fileInfo);
flushRes = __FhgfsOpsHelper_flushCacheUnlocked(app, fhgfsInode, false);
if(unlikely(flushRes != FhgfsOpsErr_SUCCESS) )
{ // flush failed
retVal = -flushRes;
goto unlock_and_exit;
}
retVal = __FhgfsOpsHelper_readCacheFlushed(iter, size, offset, fhgfsInode, fileInfo, ioInfo);
goto unlock_and_exit;
}
// cache range and read range are overlapping. three possible cases to be handled:
// read range before/inside/behind the cache range
FsFileInfo_incCacheHits(fileInfo);
remoteReadSize = 0;
if(offset < cacheBuffer->fileOffset)
{ // read begins before cache => remote-read and copy the rest
ssize_t readRes;
remoteReadSize = cacheBuffer->fileOffset - offset;
readRes = FhgfsOpsRemoting_readfileVec(iter, remoteReadSize, offset, ioInfo, fhgfsInode);
if(readRes < remoteReadSize)
{ // error or end-of-file => invalidate cache
__FhgfsOpsHelper_discardCache(app, fhgfsInode);
retVal = readRes;
goto unlock_and_exit;
}
}
// remote read (if any) succeeded => copy part from cache
cacheCopyOffset = (offset <= cacheBuffer->fileOffset) ? 0 : (offset - cacheBuffer->fileOffset);
cacheCopySize = MIN(cacheBuffer->fileOffset + cacheBuffer->bufUsageLen, offset + size) -
(cacheBuffer->fileOffset + cacheCopyOffset);
{
size_t numCopied = copy_to_iter(&(cacheBuffer->buf)[cacheCopyOffset], cacheCopySize, iter);
if (unlikely(numCopied != cacheCopySize))
{ // copy failed
Logger* log = App_getLogger(app);
Logger_log(log, Log_DEBUG, __func__, "Buffer copy to userspace failed (invalid buffer)");
retVal = -FhgfsOpsErr_ADDRESSFAULT;
goto unlock_and_exit;
}
}
// has everything been read or is there a remainder behind the cacheBuf?
if(readEndOffset > cacheEndOffset)
{ // there is a remainder behind the cacheBuf => discard cache (to allow new caching)
//loff_t remainderBufOffset = remoteReadSize + cacheCopySize;
loff_t remainderFileOffset = cacheEndOffset + 1;
size_t remainderSize = readEndOffset - cacheEndOffset;
ssize_t remainderRes;
__FhgfsOpsHelper_discardCache(app, fhgfsInode);
remainderRes = __FhgfsOpsHelper_readCacheFlushed(iter,
remainderSize, remainderFileOffset, fhgfsInode, fileInfo, ioInfo);
if(unlikely(remainderRes < 0) )
{ // reading failed
retVal = remainderRes;
goto unlock_and_exit;
}
retVal = remoteReadSize + cacheCopySize + remainderRes;
goto unlock_and_exit;
}
// read was successful if we got here
retVal = size;
unlock_and_exit:
FhgfsInode_fileCacheExclusiveUnlock(fhgfsInode); // U N L O C K (exclusive)
return retVal;
}
/**
* Try to cache the write data or delegate to the normal remoting writefile().
*
* Note: Use this only when definitely no file cache entry exists.
* Note: Unlocked, so caller must hold inode cache lock.
*
* @param offset offset in file, -1 for append
* @return number of bytes written or negative fhgfs error code
*/
ssize_t __FhgfsOpsHelper_writeCacheFlushed(struct iov_iter *iter,
size_t size, loff_t offset, FhgfsInode* fhgfsInode, FsFileInfo* fileInfo, RemotingIOInfo* ioInfo)
{
App* app = ioInfo->app;
NoAllocBufferStore* cacheStore = App_getCacheBufStore(app);
InodeRefStore* refStore = App_getInodeRefStore(app);
bool isAppendWrite = (offset == -1); // true if this is an append write
StripePattern* pattern = ioInfo->pattern;
size_t maxCacheLen;
CacheBuffer* cacheBuffer;
int userBufCopyRes;
// caching disabled?
if(!FsFileInfo_getAllowCaching(fileInfo) ||
(!isAppendWrite && (FsFileInfo_getCacheHits(fileInfo) <= 0) ) )
return FhgfsOpsHelper_writefileEx(fhgfsInode, iter, size, offset, ioInfo);
// check whether the write size is larger than cacheBuf or would span multiple chunks
if(isAppendWrite)
maxCacheLen = NoAllocBufferStore_getBufSize(cacheStore);
else
{ // normal write (not append)
size_t currentChunkSize = StripePattern_getChunkEnd(pattern, offset) - offset + 1;
maxCacheLen = MIN(currentChunkSize, NoAllocBufferStore_getBufSize(cacheStore) );
}
if(size >= maxCacheLen) // (Note: '=' because we don't want a completely filled up buffer)
{ // scenario not allowed => write through
return FhgfsOpsHelper_writefileEx(fhgfsInode, iter, size, offset, ioInfo);
}
// we got something to cache here (data fits completely into a cacheBuf)
// create new cache (if buffer available)
cacheBuffer = Fhgfsinode_getFileCacheBuffer(fhgfsInode);
#ifdef BEEGFS_DEBUG
BEEGFS_BUG_ON(cacheBuffer->buf, "Looks like we're about to leak a cache buffer");
#endif // BEEGFS_DEBUG
cacheBuffer->buf = NoAllocBufferStore_instantBuf(cacheStore);
if(!cacheBuffer->buf)
{ // no cache buffer left in the store => write through
return FhgfsOpsHelper_writefileEx(fhgfsInode, iter, size, offset, ioInfo);
}
cacheBuffer->bufType = FileBufferType_WRITE; // (needed here for _discardCache() below)
// init cache entry fields and copy data to cacheBuf
userBufCopyRes = copy_from_iter(cacheBuffer->buf, size, iter);
if(unlikely(userBufCopyRes != size))
{ // copy failed
Logger* log = App_getLogger(app);
Logger_log(log, Log_DEBUG, __func__, "Buffer copy from userspace failed (invalid buffer)");
__FhgfsOpsHelper_discardCache(app, fhgfsInode);
return -FhgfsOpsErr_ADDRESSFAULT;
}
cacheBuffer->bufUsageLen = size;
cacheBuffer->bufUsageMaxLen = maxCacheLen;
cacheBuffer->fileOffset = offset; // (note: can be -1 in case of append)
// add to inode ref store for async flush
InodeRefStore_addAndReferenceInode(refStore, BEEGFS_VFSINODE(fhgfsInode) );
return size;
}
/**
* Try to read-ahead data or just do a plain normal remoting readfile().
*
* Note: Use this only when definitely no file cache entry exists.
* Note: Unlocked, so caller must hold inode cache lock.
*
* @return number of bytes read or negative fhgfs error code
*/
ssize_t __FhgfsOpsHelper_readCacheFlushed(struct iov_iter *iter, size_t size, loff_t offset,
FhgfsInode* fhgfsInode, FsFileInfo* fileInfo, RemotingIOInfo* ioInfo)
{
App* app = ioInfo->app;
NoAllocBufferStore* cacheStore = App_getCacheBufStore(app);
InodeRefStore* refStore = App_getInodeRefStore(app);
StripePattern* pattern = ioInfo->pattern;
size_t currentChunkSize;
size_t maxCacheLen;
CacheBuffer* cacheBuffer;
ssize_t readRes;
ssize_t numIterCopy;
// caching disabled?
if(!FsFileInfo_getAllowCaching(fileInfo) || (FsFileInfo_getCacheHits(fileInfo) <= 0) ||
FhgfsInode_getIsFileOpenByMultipleReaders(fhgfsInode) )
return FhgfsOpsRemoting_readfileVec(iter, size, offset, ioInfo, fhgfsInode);
// check whether the read size is larger than a single cacheBuf or would span multiple chunks
currentChunkSize = StripePattern_getChunkEnd(pattern, offset) - offset + 1;
maxCacheLen = MIN(currentChunkSize, NoAllocBufferStore_getBufSize(cacheStore) );
if(size >= maxCacheLen) // (Note: '=' because we don't want a completely used up buffer)
{ // scenario not allowed => read direct
return FhgfsOpsRemoting_readfileVec(iter, size, offset, ioInfo, fhgfsInode);
}
// looks like we got something to cache here (read size is smaller than a single cacheBuf)
// create new cache (if buffer available)
cacheBuffer = Fhgfsinode_getFileCacheBuffer(fhgfsInode);
#ifdef BEEGFS_DEBUG
BEEGFS_BUG_ON(cacheBuffer->buf, "Looks like we're about to leak a cache buffer");
#endif // BEEGFS_DEBUG
cacheBuffer->buf = NoAllocBufferStore_instantBuf(cacheStore);
if(!cacheBuffer->buf)
{ // no cache buffer left in the store => read direct
return FhgfsOpsRemoting_readfileVec(iter, size, offset, ioInfo, fhgfsInode);
}
cacheBuffer->bufType = FileBufferType_READ; // (needed here for _discardCache() below)
// we got a buffer => read as much as possible into the cache buffer
if(!offset && (size < FSFILEINFO_CACHE_SLOWSTART_READLEN) )
maxCacheLen = FSFILEINFO_CACHE_SLOWSTART_READLEN; /* reduce read-ahead at file start for small
reads. good if e.g. a process is only looking at file starts */
readRes = FhgfsOpsRemoting_readfile_kernel(cacheBuffer->buf, maxCacheLen, offset, ioInfo, fhgfsInode);
if(readRes <= 0)
{ // error or immediate end of file
__FhgfsOpsHelper_discardCache(app, fhgfsInode);
return readRes;
}
numIterCopy = MIN((ssize_t) size, readRes);
// init cache entry fields and copy data from cache buffer to client iter
{
size_t numCopied = copy_to_iter(cacheBuffer->buf, numIterCopy, iter);
if (unlikely(numCopied != numIterCopy))
{ // copy failed
Logger* log = App_getLogger(app);
Logger_log(log, Log_DEBUG, __func__, "Buffer copy to userspace failed (invalid buffer)");
__FhgfsOpsHelper_discardCache(app, fhgfsInode);
return -FhgfsOpsErr_ADDRESSFAULT;
}
}
cacheBuffer->bufUsageLen = readRes;
cacheBuffer->bufUsageMaxLen = maxCacheLen;
cacheBuffer->fileOffset = offset;
// add to inode ref store for async flush
InodeRefStore_addAndReferenceInode(refStore, BEEGFS_VFSINODE(fhgfsInode) );
return numIterCopy;
}
/**
* Discard the current cache buffer and return it to the store.
*
* Note: Unlocked, so caller must hold the inode cache lock.
*/
void __FhgfsOpsHelper_discardCache(App* app, FhgfsInode* fhgfsInode)
{
NoAllocBufferStore* cacheStore = App_getCacheBufStore(app);
InodeRefStore* refStore = App_getInodeRefStore(app);
CacheBuffer* cacheBuffer = Fhgfsinode_getFileCacheBuffer(fhgfsInode);
#ifdef BEEGFS_DEBUG
if( (cacheBuffer->buf == NULL) || (cacheBuffer->bufType == FileBufferType_NONE) )
{
BEEGFS_BUG_ON(1, "Attempting to discard an invalid cache buffer");
return;
}
#endif // BEEGFS_DEBUG
NoAllocBufferStore_addBuf(cacheStore, cacheBuffer->buf);
cacheBuffer->buf = NULL; // (NULL'ing required for debug sanity checks)
cacheBuffer->bufType = FileBufferType_NONE;
// remove inode from async flush store
InodeRefStore_removeAndReleaseInode(refStore, BEEGFS_VFSINODE(fhgfsInode) );
}
FhgfsOpsErr FhgfsOpsHelper_getAppendLock(FhgfsInode* inode, RemotingIOInfo* ioInfo)
{
FhgfsOpsErr lockRes;
FhgfsInode_entryInfoReadLock(inode); // LOCK EntryInfo
lockRes = FhgfsOpsRemoting_flockAppendEx(&inode->entryInfo, &inode->entryInfoLock, ioInfo->app,
ioInfo->fileHandleID, 0, current->pid, ENTRYLOCKTYPE_EXCLUSIVE, true);
FhgfsInode_entryInfoReadUnlock(inode); // UNLOCK EntryInfo
if(unlikely(lockRes != FhgfsOpsErr_SUCCESS) )
{
LOG_DEBUG_FORMATTED(App_getLogger(ioInfo->app), Log_DEBUG, __func__, "Append lock error: %s",
FhgfsOpsErr_toErrString(lockRes) );
SAFE_ASSIGN(ioInfo->needsAppendLockCleanup, true);
}
return lockRes;
}
FhgfsOpsErr FhgfsOpsHelper_releaseAppendLock(FhgfsInode* inode, RemotingIOInfo* ioInfo)
{
FhgfsOpsErr unlockRes;
FhgfsInode_entryInfoReadLock(inode); // LOCK EntryInfo
unlockRes = FhgfsOpsRemoting_flockAppendEx(&inode->entryInfo, &inode->entryInfoLock, ioInfo->app,
ioInfo->fileHandleID, 0, current->pid, ENTRYLOCKTYPE_UNLOCK, true);
FhgfsInode_entryInfoReadUnlock(inode); // UNLOCK EntryInfo
if(unlikely(unlockRes != FhgfsOpsErr_SUCCESS) )
SAFE_ASSIGN(ioInfo->needsAppendLockCleanup, true);
return unlockRes;
}
/**
* Reads chunk by chunk from the servers and zero-fills the buffer if a server returns EOF.
* So the caller must make sure that the file actually has at least the requested size.
*
* Note: Intended for sparse file reading.
* Note: There is also a similar version for kernel buffers.
*/
FhgfsOpsErr FhgfsOpsHelper_readOrClearUser(App* app, struct iov_iter *iter, size_t size,
loff_t offset, FsFileInfo* fileInfo, RemotingIOInfo* ioInfo)
{
StripePattern* pattern = ioInfo->pattern;
while(size)
{
size_t currentChunkSize = StripePattern_getChunkEnd(pattern, offset) - offset + 1;
size_t currentReadSize = MIN(currentChunkSize, size);
ssize_t currentReadRes;
currentReadRes = FhgfsOpsRemoting_readfileVec(iter, currentReadSize, offset, ioInfo, NULL);
if(unlikely(currentReadRes < 0) )
return -currentReadRes;
if( (size_t)currentReadRes < currentReadSize)
{ // zero-fill the remainder
long clearVal;
ssize_t nclear = currentReadSize - currentReadRes;
clearVal = iov_iter_zero(nclear, iter);
if (clearVal != nclear)
return FhgfsOpsErr_ADDRESSFAULT;
}
offset += currentReadSize;
size -= currentReadSize;
}
return FhgfsOpsErr_SUCCESS;
}
/**
* Takes two paths that are relative to the mount point and creates a new path that is relative from
* pathFromStr to pathToStr.
*
* @param outPathRelativeToStr will be kalloc'ed and needs to be kfree'd by the caller
*/
void FhgfsOpsHelper_getRelativeLinkStr(const char* pathFromStr, const char* pathToStr,
char** outPathRelativeToStr)
{
// the idea here is to prepend a "../" to pathTo for every parent dir of pathFrom
int i;
int pathFromLen; // (must be signed, because it might be negative for certain paths)
Path pathFrom;
Path pathTo; // will be modified to become the relative result path
StrCpyList* pathFromElems;
StrCpyList* pathToElems;
Path_initFromString(&pathFrom, pathFromStr);
Path_initFromString(&pathTo, pathToStr);
pathFromElems = Path_getPathElems(&pathFrom);
pathToElems = Path_getPathElems(&pathTo);
pathFromLen = StrCpyList_length(pathFromElems);
// insert a ".." for every parent dir in pathFrom
// (note: -1 to excluse the final path element)
for(i = 0; i < (pathFromLen-1); i++)
{
StrCpyList_addHead(pathToElems, "..");
}
Path_setAbsolute(&pathTo, false);
*outPathRelativeToStr = Path_getPathAsStrCopy(&pathTo);
Path_uninit(&pathFrom);
Path_uninit(&pathTo);
}
/**
* Note: creates the symlink as a normal file and writes the target to it
*
* @param mode access mode (permission flags)
* @return 0 on success, negative linux error code otherwise
*/
int FhgfsOpsHelper_symlink(App* app, const EntryInfo* parentInfo, const char* to,
struct CreateInfo* createInfo, EntryInfo* outEntryInfo)
{
int retVal = 0;
size_t toStrLen = strlen(to);
FhgfsOpsErr mkRes;
AtomicInt maxUsedTargetIndex;
RemotingIOInfo ioInfo;
FhgfsOpsErr openRes;
size_t numTargets;
ssize_t writeRes;
FhgfsOpsErr closeRes;
BitStore firstWriteDone;
PathInfo pathInfo;
const struct FileEvent* event;
AtomicInt_init(&maxUsedTargetIndex, -1);
// create the file
// stash createInfo->fileEvent. we want to send it only during close (when the symlink is fully
// created) - mkfile would generate its own event if createInfo->fileEvent was set.
event = createInfo->fileEvent;
createInfo->fileEvent = NULL;
mkRes = FhgfsOpsRemoting_mkfile(app, parentInfo, createInfo, outEntryInfo);
createInfo->fileEvent = event;
if(mkRes != FhgfsOpsErr_SUCCESS)
{ // error
retVal = FhgfsOpsErr_toSysErr(mkRes);
goto err_exit;
}
// open the file
memset(&pathInfo, 0, sizeof(pathInfo) );
RemotingIOInfo_initOpen(app, OPENFILE_ACCESS_WRITE, &maxUsedTargetIndex, &pathInfo, &ioInfo);
openRes = FhgfsOpsRemoting_openfile(outEntryInfo, &ioInfo, NULL, NULL);
if(openRes != FhgfsOpsErr_SUCCESS)
{ // error
FhgfsOpsRemoting_unlinkfile(app, parentInfo, createInfo->entryName, NULL);
retVal = FhgfsOpsErr_toSysErr(openRes);
goto err_cleanup_open;
}
numTargets = UInt16Vec_length(ioInfo.pattern->getStripeTargetIDs(ioInfo.pattern));
BitStore_initWithSizeAndReset(&firstWriteDone, numTargets);
ioInfo.firstWriteDone = &firstWriteDone;
ioInfo.userID = createInfo->userID;
ioInfo.groupID = createInfo->groupID;
#ifdef BEEGFS_NVFS
ioInfo.nvfs = false;
#endif
// write link-destination to the file
writeRes = FhgfsOpsRemoting_writefile_kernel(to, toStrLen, 0, &ioInfo);
if(writeRes < (ssize_t)toStrLen)
{ // error
FhgfsOpsHelper_closefileWithAsyncRetry(outEntryInfo, &ioInfo, NULL);
FhgfsOpsRemoting_unlinkfile(app, parentInfo, createInfo->entryName, NULL);
retVal = (writeRes < 0) ? FhgfsOpsErr_toSysErr(-writeRes) : FhgfsOpsErr_INTERNAL;
goto err_cleanup_open;
}
// close the file
// callee frees fileEvent and wants a non-const pointer to signify this. callers of _symlink
// must be aware of this and not free the fileEvent themselves
closeRes = FhgfsOpsHelper_closefileWithAsyncRetry(outEntryInfo, &ioInfo,
(struct FileEvent*) createInfo->fileEvent);
if(closeRes != FhgfsOpsErr_SUCCESS)
{ // error
retVal = FhgfsOpsErr_toSysErr(closeRes);
// createInfo->fileEvent has been taken care of, don't free it again
createInfo->fileEvent = NULL;
goto err_cleanup_open;
}
// clean-up
BitStore_uninit(&firstWriteDone);
RemotingIOInfo_freeVals(&ioInfo);
return retVal;
err_cleanup_open:
if (ioInfo.firstWriteDone)
BitStore_uninit(ioInfo.firstWriteDone);
RemotingIOInfo_freeVals(&ioInfo);
EntryInfo_uninit(outEntryInfo);
if (createInfo->fileEvent)
FileEvent_uninit((struct FileEvent*) createInfo->fileEvent);
err_exit:
return retVal;
}
/**
* Opens a file (the file must exist), reads data from it and closes it.
*
* Note: This is really slow currently, because of the open/close overhead.
*
* @return number of read bytes or negative linux error code
*/
ssize_t FhgfsOpsHelper_readStateless(App* app, const EntryInfo* entryInfo,
struct iov_iter *iter, size_t size, loff_t offset)
{
int retVal = -EREMOTEIO;
AtomicInt maxUsedTargetIndex;
RemotingIOInfo ioInfo;
FhgfsOpsErr openRes;
ssize_t readRes;
size_t numTargets;
FhgfsOpsErr closeRes;
BitStore firstWriteDone;
PathInfo pathInfo;
AtomicInt_init(&maxUsedTargetIndex, -1);
// open file
memset(&pathInfo, 0, sizeof(pathInfo) );
RemotingIOInfo_initOpen(app, OPENFILE_ACCESS_READ, &maxUsedTargetIndex, &pathInfo, &ioInfo);
openRes = FhgfsOpsRemoting_openfile(entryInfo, &ioInfo, NULL, NULL);
if(openRes != FhgfsOpsErr_SUCCESS)
{ // error
retVal = FhgfsOpsErr_toSysErr(openRes);
goto clean_up_open;
}
numTargets = UInt16Vec_length(ioInfo.pattern->getStripeTargetIDs(ioInfo.pattern));
BitStore_initWithSizeAndReset(&firstWriteDone, numTargets);
ioInfo.firstWriteDone = &firstWriteDone;
// read file
readRes = FhgfsOpsRemoting_readfileVec(iter, size, offset, &ioInfo, NULL);
if(readRes < 0)
{ // error
FhgfsOpsHelper_closefileWithAsyncRetry(entryInfo, &ioInfo, NULL);
retVal = FhgfsOpsErr_toSysErr(-readRes);
goto clean_up_open;
}
retVal = readRes;
// close the file
closeRes = FhgfsOpsHelper_closefileWithAsyncRetry(entryInfo, &ioInfo, NULL);
if(closeRes != FhgfsOpsErr_SUCCESS)
{ // error
retVal = FhgfsOpsErr_toSysErr(closeRes);
}
// clean-up
clean_up_open:
if (ioInfo.firstWriteDone)
BitStore_uninit(ioInfo.firstWriteDone);
RemotingIOInfo_freeVals(&ioInfo);
return retVal;
}
/**
* Writes the given buffer by getting a reference handle from the inode and releasing that handle
* immediately after the write.
*
* Note: This can safe the overhead for remote open/close if the file is already open for writing
* (or reading+writing) by any other process, so it is way better than the _writeStateless()
* method.
*
* @param offset offset in file, -1 for append
* @return bytes written or negative fhgfs error code
*/
static ssize_t FhgfsOpsHelper_writeStatelessInode(FhgfsInode* fhgfsInode, const char *buf,
size_t size, loff_t offset)
{
FileHandleType handleType;
RemotingIOInfo ioInfo;
FhgfsOpsErr referenceRes;
ssize_t writeRes;
FhgfsOpsErr releaseRes;
// open file
/* referenceHandle needs a dentry only for possible TRUNC operations. */
referenceRes = FhgfsInode_referenceHandle(fhgfsInode, NULL, OPENFILE_ACCESS_WRITE, true, NULL,
&handleType, NULL);
if(unlikely(referenceRes != FhgfsOpsErr_SUCCESS) )
{ // error
return -referenceRes;
}
// write file
FhgfsInode_getRefIOInfo(fhgfsInode, handleType, FhgfsInode_handleTypeToOpenFlags(handleType),
&ioInfo);
writeRes = FhgfsOpsHelper_writefileEx_kernel(fhgfsInode, buf, size, offset, &ioInfo);
if(unlikely(writeRes < 0) )
{ // error
FhgfsInode_releaseHandle(fhgfsInode, handleType, NULL);
return writeRes;
}
// close file
releaseRes = FhgfsInode_releaseHandle(fhgfsInode, handleType, NULL);
if(unlikely(releaseRes != FhgfsOpsErr_SUCCESS) )
{ // error
return -releaseRes;
}
return writeRes;
}
/**
* Opens a file (the file must exist), writes the data to it and closes it.
*
* Note: This is really slow, because of the open/close overhead - use _writeStatelessInode()
* method instead, if possible.
*
* @return number of written bytes or negative linux error code
*/
ssize_t FhgfsOpsHelper_writeStateless(App* app, const EntryInfo* entryInfo,
struct iov_iter *iter, size_t size, loff_t offset, unsigned uid, unsigned gid)
{
int retVal = -EREMOTEIO;
AtomicInt maxUsedTargetIndex;
RemotingIOInfo ioInfo;
FhgfsOpsErr openRes;
ssize_t writeRes;
FhgfsOpsErr closeRes;
BitStore firstWriteDone;
size_t numTargets;
PathInfo pathInfo;
AtomicInt_init(&maxUsedTargetIndex, -1);
// open file
memset(&pathInfo, 0, sizeof(pathInfo) );
RemotingIOInfo_initOpen(app, OPENFILE_ACCESS_WRITE, &maxUsedTargetIndex, &pathInfo, &ioInfo);
openRes = FhgfsOpsRemoting_openfile(entryInfo, &ioInfo, NULL, NULL);
if(openRes != FhgfsOpsErr_SUCCESS)
{ // error
retVal = FhgfsOpsErr_toSysErr(openRes);
goto clean_up_open;
}
numTargets = UInt16Vec_length(ioInfo.pattern->getStripeTargetIDs(ioInfo.pattern));
BitStore_initWithSizeAndReset(&firstWriteDone, numTargets);
ioInfo.firstWriteDone = &firstWriteDone;
ioInfo.userID = uid;
ioInfo.groupID = gid;
#ifdef BEEGFS_NVFS
ioInfo.nvfs = false;
#endif
// write file
writeRes = FhgfsOpsRemoting_writefileVec(iter, offset, &ioInfo, false);
if(writeRes < 0)
{ // error
FhgfsOpsHelper_closefileWithAsyncRetry(entryInfo, &ioInfo, NULL);
retVal = FhgfsOpsErr_toSysErr(-writeRes);
goto clean_up_open;
}
retVal = writeRes;
// close file
closeRes = FhgfsOpsHelper_closefileWithAsyncRetry(entryInfo, &ioInfo, NULL);
if(closeRes != FhgfsOpsErr_SUCCESS)
{ // error
retVal = FhgfsOpsErr_toSysErr(closeRes);
}
// clean-up
clean_up_open:
if (ioInfo.firstWriteDone)
BitStore_uninit(ioInfo.firstWriteDone);
RemotingIOInfo_freeVals(&ioInfo);
return retVal;
}
/**
* Flush cache buffer and return it to the store.
*
* Note: Unlocked, so caller must hold inode cache lock.
*
* @param discardCacheOnError true to discard a write cache if the remote write
* was not successful; false to just release it to the store in this case
*/
FhgfsOpsErr __FhgfsOpsHelper_flushCacheUnlocked(App* app, FhgfsInode* fhgfsInode,
bool discardCacheOnError)
{
/* note: we don't take a handle to an open file here, because we wouldn't know whether the
given handle is a read-only handle that just needs to flush the write cache to establish a new
read cache. that's why we use the _writeStatelessInode() method here. */
CacheBuffer* cacheBuffer = Fhgfsinode_getFileCacheBuffer(fhgfsInode);
enum FileBufferType cacheType = cacheBuffer->bufType;
ssize_t writeRes;
if(cacheType == FileBufferType_NONE)
return FhgfsOpsErr_SUCCESS;
if(cacheType == FileBufferType_READ)
{ // file has a read cache => just discard the cached data
__FhgfsOpsHelper_discardCache(app, fhgfsInode);
return FhgfsOpsErr_SUCCESS;
}
// file has a write cache => send the cached data to the storage nodes
writeRes = FhgfsOpsHelper_writeStatelessInode(fhgfsInode, cacheBuffer->buf,
cacheBuffer->bufUsageLen, cacheBuffer->fileOffset);
if(unlikely(writeRes < (ssize_t)cacheBuffer->bufUsageLen) )
{ // write did not succeed
if(discardCacheOnError)
__FhgfsOpsHelper_discardCache(app, fhgfsInode);
if(writeRes > 0)
return FhgfsOpsErr_NOSPACE;
return -writeRes;
}
// write succeeded
__FhgfsOpsHelper_discardCache(app, fhgfsInode);
return FhgfsOpsErr_SUCCESS;
}
/**
* Flush cache buffer and return it to the store.
*
* @param discardCacheOnError true to discard a write cache if the remote write
* was not successful; false to just release it to the store in this case
*/
FhgfsOpsErr FhgfsOpsHelper_flushCache(App* app, FhgfsInode* fhgfsInode,
bool discardCacheOnError)
{
FhgfsOpsErr retVal;
FhgfsInode_fileCacheExclusiveLock(fhgfsInode); // L O C K
retVal = __FhgfsOpsHelper_flushCacheUnlocked(app, fhgfsInode, discardCacheOnError);
FhgfsInode_fileCacheExclusiveUnlock(fhgfsInode); // U N L O C K
return retVal;
}