New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

View File

@@ -0,0 +1,103 @@
/*
* Dentry information stored on disk
*/
#pragma once
#include <common/storage/StorageDefinitions.h>
/* Note: Don't forget to update DiskMetaData::getSupportedDentryFeatureFlags() if you add new
* flags here. */
// Feature flags, 16 bit
#define DENTRY_FEATURE_INODE_INLINE 1 // inode inlined into a dentry
#define DENTRY_FEATURE_IS_FILEINODE 2 // file-inode
#define DENTRY_FEATURE_MIRRORED 4 // feature flag for mirrored dentries (deprecated)
#define DENTRY_FEATURE_BUDDYMIRRORED 8 // feature flag to indicate buddy mirrored dentries
#define DENTRY_FEATURE_32BITIDS 16 // dentry uses 32bit node IDs on disk
class DentryStoreData
{
friend class DirEntry;
friend class DiskMetaData;
friend class FileInode;
public:
DentryStoreData()
: entryType(DirEntryType_INVALID),
ownerNodeID(0),
dentryFeatureFlags(0)
{ }
DentryStoreData(const std::string& entryID, DirEntryType entryType, NumNodeID ownerNodeID,
unsigned dentryFeatureFlags)
: entryID(entryID),
entryType(entryType),
ownerNodeID(ownerNodeID),
dentryFeatureFlags( (uint16_t)dentryFeatureFlags)
{ }
std::string entryID; // a filesystem-wide identifier for this dir
DirEntryType entryType;
NumNodeID ownerNodeID; // 0 means undefined
uint16_t dentryFeatureFlags;
protected:
// getters / setters
void setEntryID(const std::string& entryID)
{
this->entryID = entryID;
}
const std::string& getEntryID() const
{
return this->entryID;
}
void setDirEntryType(DirEntryType entryType)
{
this->entryType = entryType;
}
DirEntryType getDirEntryType() const
{
return this->entryType;
}
void setOwnerNodeID(NumNodeID ownerNodeID)
{
this->ownerNodeID = ownerNodeID;
}
NumNodeID getOwnerNodeID() const
{
return this->ownerNodeID;
}
void setDentryFeatureFlags(unsigned dentryFeatureFlags)
{
this->dentryFeatureFlags = dentryFeatureFlags;
}
void addDentryFeatureFlag(unsigned dentryFeatureFlag)
{
this->dentryFeatureFlags |= dentryFeatureFlag;
}
void removeDentryFeatureFlag(unsigned flag)
{
this->dentryFeatureFlags &= ~flag;
}
public:
unsigned getDentryFeatureFlags() const
{
return this->dentryFeatureFlags;
}
};

View File

@@ -0,0 +1,869 @@
#include <common/toolkit/serialization/Serialization.h>
#include <components/buddyresyncer/BuddyResyncer.h>
#include <program/Program.h>
#include "MetadataEx.h"
#include "DirEntry.h"
#include <sys/xattr.h>
/*
* Store the dirEntryID file. This is a normal dirEntry (with inlined inode),
* but the file name is the entryID.
*
* @param logContext
* @param idPath - path to the idFile, including the file name
*/
FhgfsOpsErr DirEntry::storeInitialDirEntryID(const char* logContext, const std::string& idPath)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
char buf[DIRENTRY_SERBUF_SIZE];
Serializer ser(buf, sizeof(buf));
bool useXAttrs = Program::getApp()->getConfig()->getStoreUseExtendedAttribs();
// create file
/* note: if we ever think about switching to a rename-based version here, we must keep very
long user file names in mind, which might lead to problems if we add an extension to the
temporary file name. */
int openFlags = O_CREAT|O_EXCL|O_WRONLY;
int fd = open(idPath.c_str(), openFlags, 0644);
if (unlikely (fd == -1) ) // this is our ID file, failing to create it is very unlikely
{ // error
LogContext(logContext).logErr("Unable to create dentry file: " + idPath + ". " +
"SysErr: " + System::getErrString() );
if (errno == EMFILE)
{ /* Creating the file succeeded, but there are already too many open file descriptors to
* open the file. We don't want to leak an entry-by-id file, so delete it.
* We only want to delete the file for specific errors, as for example EEXIST would mean
* we would delete an existing (probably) working entry. */
int unlinkRes = unlink(idPath.c_str() );
if (unlinkRes && errno != ENOENT)
LogContext(logContext).logErr("Failed to unlink failed dentry: " + idPath + ". " +
"SysErr: " + System::getErrString() );
}
if (errno == EEXIST)
{
/* EEXIST never should happen, as our ID is supposed to be unique, but there rare cases
* as for the upgrade tool */
retVal = FhgfsOpsErr_EXISTS;
#ifdef BEEGFS_DEBUG
LogContext(logContext).logBacktrace();
#endif
}
else
{
retVal = FhgfsOpsErr_INTERNAL;
}
return retVal;
}
// serialize (to new buf)
serializeDentry(ser);
if (!ser.good())
{
LogContext(logContext).logErr("Dentry too large: " + idPath + ".");
retVal = FhgfsOpsErr_INTERNAL;
}
// write buf to file
if(useXAttrs)
{ // extended attribute
int setRes = fsetxattr(fd, META_XATTR_NAME, buf, ser.size(), 0);
if(unlikely(setRes == -1) )
{ // error
LogContext(logContext).logErr("Unable to store dentry xattr metadata: " + idPath + ". " +
"SysErr: " + System::getErrString() );
retVal = FhgfsOpsErr_INTERNAL;
goto error_closefile;
}
}
else
{ // normal file content
ssize_t writeRes = write(fd, buf, ser.size());
if(unlikely(writeRes != (ssize_t)ser.size()))
{ // error
LogContext(logContext).logErr("Unable to store dentry metadata: " + idPath + ". " +
"SysErr: " + System::getErrString() );
retVal = FhgfsOpsErr_INTERNAL;
goto error_closefile;
}
}
close(fd);
return retVal;
// error compensation
error_closefile:
close(fd);
int unlinkRes = unlink(idPath.c_str() );
if (unlikely(unlinkRes && errno != ENOENT) )
{
LogContext(logContext).logErr("Creating the dentry-by-name file failed and"
"now also deleting the dentry-by-id file fails: " + idPath);
}
return retVal;
}
/**
* Store the dirEntry as file name
*/
FhgfsOpsErr DirEntry::storeInitialDirEntryName(const char* logContext, const std::string& idPath,
const std::string& namePath, bool isNonInlinedInode)
{
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
int linkRes = link(idPath.c_str(), namePath.c_str() );
if (linkRes)
{ /* Creating the dirEntry-by-name failed, most likely this is EEXIST.
* In principle it also might be possible there is an invalid dentry-by-name file,
* however, we already want to delete those during lookup calls now. So invalid
* entries are supposed to be very very unlikely and so no self-healing code is
* implemented here. */
if (likely(errno == EEXIST) )
retVal = FhgfsOpsErr_EXISTS;
else
{
LogContext(logContext).logErr("Creating the dentry-by-name file failed: Path: " +
namePath + " SysErr: " + System::getErrString() );
retVal = FhgfsOpsErr_INTERNAL;
}
int unlinkRes = unlink(idPath.c_str() );
if (unlikely(unlinkRes) )
{
LogContext(logContext).logErr("Creating the dentry-by-name file failed and"
"now also deleting the dentry-by-id file fails: " + idPath);
}
return retVal;
}
if (isNonInlinedInode)
{
// unlink the dentry-by-id file - we don't need it for dirs (or non-inlined inodes in general)
int unlinkRes = unlink(idPath.c_str() );
if (unlikely(unlinkRes) )
{
LogContext(logContext).logErr("Failed to unlink the (dir) dentry-by-id file "+ idPath +
" SysErr: " + System::getErrString() );
}
}
LOG_DEBUG(logContext, 4, "Initial dirEntry stored: " + namePath);
return retVal;
}
/**
* Note: Wrapper/chooser for storeUpdatedDirEntryBufAsXAttr/Contents.
*
* @param buf the serialized object state that is to be stored
*/
bool DirEntry::storeUpdatedDirEntryBuf(const std::string& idStorePath, char* buf, unsigned bufLen)
{
bool useXAttrs = Program::getApp()->getConfig()->getStoreUseExtendedAttribs();
bool result = useXAttrs
? storeUpdatedDirEntryBufAsXAttr(idStorePath, buf, bufLen)
: storeUpdatedDirEntryBufAsContents(idStorePath, buf, bufLen);
return result;
}
/**
* Note: Don't call this directly, use the wrapper storeUpdatedDirEntryBuf().
*
* @param buf the serialized object state that is to be stored
*/
bool DirEntry::storeUpdatedDirEntryBufAsXAttr(const std::string& idStorePath,
char* buf, unsigned bufLen)
{
const char* logContext = DIRENTRY_LOG_CONTEXT "(store updated xattr metadata)";
// write data to file
int setRes = setxattr(idStorePath.c_str(), META_XATTR_NAME, buf, bufLen, 0);
if(unlikely(setRes == -1) )
{ // error
LogContext(logContext).logErr("Unable to write dentry update: " +
idStorePath + ". " + "SysErr: " + System::getErrString() );
return false;
}
LOG_DEBUG(logContext, 4, "Dentry update stored: " + idStorePath);
return true;
}
/**
* Stores the update directly to the current metadata file (instead of creating a separate file
* first and renaming it).
*
* Note: Don't call this directly, use the wrapper storeUpdatedDirEntryBuf().
*
* @param buf the serialized object state that is to be stored
*/
bool DirEntry::storeUpdatedDirEntryBufAsContents(const std::string& idStorePath, char* buf,
unsigned bufLen)
{
const char* logContext = DIRENTRY_LOG_CONTEXT "(store updated metadata in-place)";
int fallocRes;
ssize_t writeRes;
int truncRes;
// open file (create it, but not O_EXCL because a former update could have failed)
int openFlags = O_CREAT|O_WRONLY;
int fd = open(idStorePath.c_str(), openFlags, 0644);
if(fd == -1)
{ // error
LogContext(logContext).logErr("Unable to create dentry metadata update file: " +
idStorePath + ". " + "SysErr: " + System::getErrString() );
return false;
}
// make sure we have enough room to write our update
fallocRes = posix_fallocate(fd, 0, bufLen); // (note: posix_fallocate does not set errno)
if(fallocRes == EBADF)
{ // special case for XFS bug
struct stat statBuf;
int statRes = fstat(fd, &statBuf);
if (statRes == -1)
{
LogContext(logContext).log(Log_WARNING, "Unexpected error: fstat() failed with SysErr: "
+ System::getErrString(errno) );
goto error_closefile;
}
if (statBuf.st_size < bufLen)
{
LogContext(logContext).log(Log_WARNING, "File space allocation ("
+ StringTk::intToStr(bufLen) + ") for metadata update failed: " + idStorePath + ". " +
"SysErr: " + System::getErrString(fallocRes) + " "
"statRes: " + StringTk::intToStr(statRes) + " "
"oldSize: " + StringTk::intToStr(statBuf.st_size) );
goto error_closefile;
}
else
{ // // XFS bug! We only return an error if statBuf.st_size < bufLen. Ingore fallocRes then
LOG_DEBUG(logContext, Log_SPAM, "Ignoring kernel file system bug: "
"posix_fallocate() failed for len < filesize");
}
}
else
if (fallocRes != 0)
{ // default error handling if posix_fallocate() failed
LogContext(logContext).log(Log_WARNING, "File space allocation ("
+ StringTk::intToStr(bufLen) + ") for metadata update failed: " + idStorePath + ". " +
"SysErr: " + System::getErrString(fallocRes) );
goto error_closefile;
}
// write data to file
writeRes = write(fd, buf, bufLen);
if(unlikely(writeRes != (ssize_t)bufLen) )
{ // error
LogContext(logContext).logErr("Unable to write dentry metadata update: " +
idStorePath + ". " + "SysErr: " + System::getErrString() );
goto error_closefile;
}
// truncate in case the update lead to a smaller file size
truncRes = ftruncate(fd, bufLen);
if(truncRes == -1)
{ // ignore trunc errors
LogContext(logContext).log(Log_WARNING, "Unable to truncate metadata file (strange, but "
"proceeding anyways): " + idStorePath + ". " + "SysErr: " + System::getErrString() );
}
close(fd);
LOG_DEBUG(logContext, 4, "Dentry metadata update stored: " + idStorePath);
return true;
// error compensation
error_closefile:
close(fd);
return false;
}
/**
* Store an update DirEntry.
*
* Note: Never write to a dentry using the entryNamePath. We might simply write to the wrong
* path. E.g. after a rename() and overwriting an opened file. Closing the overwritten
* file will result in an inode update. If then some data structures are not correct
* yet, writing to dentry-by-name instead of dentry-by-id might update the wrong file,
* instead of simply returning an error message.
* However, an exception is fsck, which needs to modify the dentry-by-name directly to update
* a dentry-owner.
*/
bool DirEntry::storeUpdatedDirEntry(const std::string& dirEntryPath)
{
char buf[DIRENTRY_SERBUF_SIZE];
Serializer ser(buf, sizeof(buf));
serializeDentry(ser);
std::string idStorePath = dirEntryPath + "/" + name;
bool result = storeUpdatedDirEntryBuf(idStorePath, buf, ser.size());
if (getIsBuddyMirrored())
if (auto* resync = BuddyResyncer::getSyncChangeset())
resync->addModification(idStorePath, MetaSyncFileType::Dentry);
return result;
}
/**
* Store the inlined inode from a dir-entry.
* */
FhgfsOpsErr DirEntry::storeUpdatedInode(const std::string& dirEntryPath)
{
if (!this->getIsInodeInlined() )
return FhgfsOpsErr_INODENOTINLINED;
char buf[DIRENTRY_SERBUF_SIZE];
Serializer ser(buf, sizeof(buf));
serializeDentry(ser);
std::string idStorePath = MetaStorageTk::getMetaDirEntryIDPath(dirEntryPath) + getEntryID();
bool storeRes = storeUpdatedDirEntryBuf(idStorePath, buf, ser.size());
if (!storeRes)
return FhgfsOpsErr_SAVEERROR;
if (getIsBuddyMirrored())
if (auto* resync = BuddyResyncer::getSyncChangeset())
resync->addModification(idStorePath, MetaSyncFileType::Inode);
return FhgfsOpsErr_SUCCESS;
}
FhgfsOpsErr DirEntry::removeDirEntryFile(const std::string& filePath)
{
int unlinkRes = unlink(filePath.c_str() );
if (unlinkRes == 0)
return FhgfsOpsErr_SUCCESS;
if (errno == ENOENT)
return FhgfsOpsErr_PATHNOTEXISTS;
LOG(GENERAL, ERR, "Unable to delete dentry file", filePath, sysErr);
return FhgfsOpsErr_INTERNAL;
}
/**
* Remove the given filePath. This method is used for dirEntries-by-entryID and dirEntries-by-name.
*/
FhgfsOpsErr DirEntry::removeDirEntryName(const char* logContext, const std::string& filePath,
bool isBuddyMirrored)
{
FhgfsOpsErr retVal = removeDirEntryFile(filePath);
if (isBuddyMirrored)
if (auto* resync = BuddyResyncer::getSyncChangeset())
resync->addDeletion(filePath, MetaSyncFileType::Dentry);
return retVal;
}
/**
* Remove the dirEntrID file.
*/
FhgfsOpsErr DirEntry::removeDirEntryID(const std::string& dirEntryPath,
const std::string& entryID, bool isBuddyMirrored)
{
std::string idPath = MetaStorageTk::getMetaDirEntryIDPath(dirEntryPath) + entryID;
FhgfsOpsErr idUnlinkRes = removeDirEntryFile(idPath);
if (likely(idUnlinkRes == FhgfsOpsErr_SUCCESS))
LOG_DBG(GENERAL, DEBUG, "Dir-Entry ID metadata deleted", idPath);
if (isBuddyMirrored)
if (auto* resync = BuddyResyncer::getSyncChangeset())
resync->addDeletion(idPath, MetaSyncFileType::Inode);
return idUnlinkRes;
}
/**
* Remove a dir-entry with an inlined inode. We cannot remove the inode, though and so
* will rename the dir-entry into the inode-hash directories to create a non-inlined
* inode (with dentry format, though).
*
* @param unlinkFileName Unlink only the ID file or also the entryName. If false entryName might
* be empty.
*/
FhgfsOpsErr DirEntry::removeBusyFile(const std::string& dirEntryBasePath,
const std::string& entryID, const std::string& entryName, unsigned unlinkTypeFlags)
{
const char* logContext = "Unlinking dirEnty with busy inlined inode";
App* app = Program::getApp();
std::string dentryPath = dirEntryBasePath + '/' + entryName;
std::string idPath = MetaStorageTk::getMetaDirEntryIDPath(dirEntryBasePath ) + entryID;
std::string inodePath = MetaStorageTk::getMetaInodePath(
getIsBuddyMirrored()
? app->getBuddyMirrorInodesPath()->str()
: app->getInodesPath()->str(),
entryID);
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
if (unlinkTypeFlags & DirEntry_UNLINK_FILENAME)
{
// Delete the dentry-by-name
int unlinkNameRes = unlink(dentryPath.c_str() );
if (unlinkNameRes)
{
if (errno != ENOENT)
{
LogContext(logContext).logErr("Failed to unlink the dirEntry file: " + dentryPath +
" SysErr: " + System::getErrString() );
retVal = FhgfsOpsErr_INTERNAL;
}
else
retVal = FhgfsOpsErr_PATHNOTEXISTS;
goto out;
}
if (getIsBuddyMirrored())
if (auto* resync = BuddyResyncer::getSyncChangeset())
resync->addDeletion(dentryPath, MetaSyncFileType::Dentry);
retVal = FhgfsOpsErr_SUCCESS;
}
if (unlinkTypeFlags & DirEntry_UNLINK_ID)
{
// Rename the ID to the inode directory
int renameRes = rename(idPath.c_str(), inodePath.c_str() );
if (!renameRes)
{
/* Posix rename() has a very weird feature - it does nothing if fromPath and toPath
* are a hard-link pointing to each other. And it even does not give an error message.
* Seems they wanted to have rename(samePath, samePath).
* Once we do support hard links, we are going to hard-link dentryPath and inodePath
* to each other. An unlink() then should be done using the seperate dentry/inode path,
* but if there should be race between link() and unlink(), we might leave a dentry in
* place, which was supposed to be unlinked. So without an error message from rename()
* if it going to do nothing, we always need to try to unlink the file now. We can only
* hope the kernel still has a negative dentry in place, which will immediately tell
* that the file already does not exist anymore. */
unlink(idPath.c_str() );
// Now link to the disposal dir
DirInode* disposalDir = getIsBuddyMirrored()
? app->getBuddyMirrorDisposalDir()
: app->getDisposalDir();
disposalDir->linkFileInodeToDir(inodePath, entryID); // entryID will be the new fileName
// Note: we ignore a possible error here, as don't know what to do with it.
if (getIsBuddyMirrored())
if (auto* resync = BuddyResyncer::getSyncChangeset())
{
resync->addDeletion(idPath, MetaSyncFileType::Inode);
resync->addModification(inodePath, MetaSyncFileType::Inode);
}
retVal = FhgfsOpsErr_SUCCESS;
}
else
{
int errCode = errno;
LogContext(logContext).logErr("Failed to move dirEntry:"
" From: " + idPath +
" To: " + inodePath +
" SysErr: " + System::getErrString() );
if (unlinkTypeFlags & DirEntry_UNLINK_FILENAME)
{
// take the error code (SUCCESS) from file name unlink
}
else
{
if (errCode == ENOENT)
retVal = FhgfsOpsErr_PATHNOTEXISTS;
else
retVal = FhgfsOpsErr_INTERNAL;
}
}
}
out:
return retVal;
}
/**
* Note: Wrapper/chooser for loadFromFileXAttr/Contents.
* To be used for all Dentry (dir-entry) operations.
*/
bool DirEntry::loadFromFileName(const std::string& dirEntryPath, const std::string& entryName)
{
std::string entryNamePath = dirEntryPath + "/" + entryName;
return loadFromFile(entryNamePath);
}
/**
* To be used for all operations regarding the inlined inode.
*/
bool DirEntry::loadFromID(const std::string& dirEntryPath, const std::string& entryID)
{
std::string idStorePath = MetaStorageTk::getMetaDirEntryIDPath(dirEntryPath) + entryID;
return loadFromFile(idStorePath);
}
/**
* Note: Wrapper/chooser for loadFromFileXAttr/Contents.
* Note: Do not call directly, but use loadFromFileName() or loadFromID()
* Retrieve the dir-entry either from xattrs or from real file data - configuration option
*/
bool DirEntry::loadFromFile(const std::string& path)
{
bool useXAttrs = Program::getApp()->getConfig()->getStoreUseExtendedAttribs();
if(useXAttrs)
return loadFromFileXAttr(path);
return loadFromFileContents(path);
}
/**
* Note: Don't call this directly, use the wrapper loadFromFileName().
*/
bool DirEntry::loadFromFileXAttr(const std::string& path)
{
const char* logContext = DIRENTRY_LOG_CONTEXT "(load from xattr file)";
Config* cfg = Program::getApp()->getConfig();
bool retVal = false;
char buf[DIRENTRY_SERBUF_SIZE];
ssize_t getRes = getxattr(path.c_str(), META_XATTR_NAME, buf, DIRENTRY_SERBUF_SIZE);
if(getRes > 0)
{ // we got something => deserialize it
Deserializer des(buf, getRes);
deserializeDentry(des);
if(unlikely(!des.good()))
{ // deserialization failed
LogContext(logContext).logErr("Unable to deserialize dir-entry file: " + path);
goto error_exit;
}
retVal = true;
}
else
if( (getRes == -1) && (errno == ENOENT) )
{ // file not exists
LOG_DEBUG_CONTEXT(LogContext(logContext), Log_DEBUG, "dir-entry file not exists: " +
path + ". " + "SysErr: " + System::getErrString() );
}
else
if( ( (getRes == 0) || ( (getRes == -1) && (errno == ENODATA) ) ) &&
(cfg->getStoreSelfHealEmptyFiles() ) )
{ // empty link file probably due to server crash => self-heal through removal
if (likely(this->name != META_DIRENTRYID_SUB_STR) )
{
LogContext(logContext).logErr("Found an empty dir-entry file. "
"(Self-healing through file removal): " + path);
int unlinkRes = unlink(path.c_str() );
if(unlinkRes == -1)
{
LogContext(logContext).logErr("File removal for self-healing failed: " + path + ". "
"SysErr: " + System::getErrString() );
}
}
}
else
{ // unhandled error
LogContext(logContext).logErr("Unable to open/read dir-entry file: " + path + ". " +
"SysErr: " + System::getErrString() );
}
error_exit:
return retVal;
}
/**
* Note: Don't call this directly, use the wrapper loadFromFile().
*/
bool DirEntry::loadFromFileContents(const std::string& path)
{
const char* logContext = DIRENTRY_LOG_CONTEXT "(load from file)";
Config* cfg = Program::getApp()->getConfig();
bool retVal = false;
char buf[DIRENTRY_SERBUF_SIZE];
int readRes;
int openFlags = O_NOATIME | O_RDONLY;
int fd = open(path.c_str(), openFlags);
if(fd == -1)
{ // open failed
if(likely(errno == ENOENT) )
{ // file not exists
LOG_DEBUG_CONTEXT(LogContext(logContext), Log_DEBUG, "Unable to open dentry file: " +
path + ". " + "SysErr: " + System::getErrString() );
}
else
{
LogContext(logContext).logErr("Unable to open link file: " + path + ". " +
"SysErr: " + System::getErrString() );
}
goto error_donothing;
}
readRes = read(fd, buf, DIRENTRY_SERBUF_SIZE);
if(likely(readRes > 0) )
{ // we got something => deserialize it
Deserializer des(buf, readRes);
deserializeDentry(des);
if(unlikely(!des.good()))
{ // deserialization failed
LogContext(logContext).logErr("Unable to deserialize dentry file: " + path);
goto error_close;
}
retVal = true;
}
else
if( (readRes == 0) && cfg->getStoreSelfHealEmptyFiles() )
{ // empty link file probably due to server crash => self-heal through removal
LogContext(logContext).logErr("Found an empty link file. "
"(Self-healing through file removal): " + path);
int unlinkRes = unlink(path.c_str() );
if(unlinkRes == -1)
{
LogContext(logContext).logErr("File removal for self-healing failed: " + path + ". "
"SysErr: " + System::getErrString() );
}
}
else
{ // read error
LogContext(logContext).logErr("Unable to read denty file: " + path + ". " +
"SysErr: " + System::getErrString() );
}
error_close:
close(fd);
error_donothing:
return retVal;
}
DirEntryType DirEntry::loadEntryTypeFromFile(const std::string& path, const std::string& entryName)
{
bool useXAttrs = Program::getApp()->getConfig()->getStoreUseExtendedAttribs();
if(useXAttrs)
return loadEntryTypeFromFileXAttr(path, entryName);
return loadEntryTypeFromFileContents(path, entryName);
}
/**
* Note: We don't do any serious error checking here, we just want to find out the type or will
* return DirEntryType_INVALID otherwise (eg if file not found)
*/
DirEntryType DirEntry::loadEntryTypeFromFileXAttr(const std::string& path,
const std::string& entryName)
{
DirEntryType retVal = DirEntryType_INVALID;
std::string storePath(path + "/" + entryName);
char buf[DIRENTRY_SERBUF_SIZE];
int getRes = getxattr(storePath.c_str(), META_XATTR_NAME, buf, DIRENTRY_SERBUF_SIZE);
if(getRes <= 0)
{ // getting failed
goto out;
}
retVal = (DirEntryType)buf[DIRENTRY_TYPE_BUF_POS];
out:
return retVal;
}
/**
* Note: We don't do any serious error checking here, we just want to find out the type or will
* return DirEntryType_INVALID otherwise (eg if file not found)
*/
DirEntryType DirEntry::loadEntryTypeFromFileContents(const std::string& path,
const std::string& entryName)
{
DirEntryType retVal = DirEntryType_INVALID;
std::string storePath(path + "/" + entryName);
int openFlags = O_NOATIME | O_RDONLY;
int fd = open(storePath.c_str(), openFlags);
if(fd == -1)
{ // open failed
return DirEntryType_INVALID;
}
char buf;
int readRes = pread(fd, &buf, 1, DIRENTRY_TYPE_BUF_POS);
if(likely(readRes > 0) )
{ // we got something
retVal = (DirEntryType)buf;
}
close(fd);
return retVal;
}
DirEntry* DirEntry::createFromFile(const std::string& path, const std::string& entryName)
{
std::string filepath = path + "/" + entryName;
DirEntry* newDir = new DirEntry(entryName);
bool loadRes = newDir->loadFromFileName(path, entryName);
if(!loadRes)
{
delete(newDir);
return NULL;
}
return newDir;
}
/**
* Return the inlined inode from a dir-entry.
*/
FileInode* DirEntry::createInodeByID(const std::string& dirEntryPath, EntryInfo* entryInfo)
{
bool loadRes = loadFromID(dirEntryPath, entryInfo->getEntryID() );
if (!loadRes)
return NULL;
if (!this->getIsInodeInlined() )
return NULL;
unsigned dentryFeatureFlags = getDentryFeatureFlags();
unsigned inodeFeatureFlags = this->inodeData.getInodeFeatureFlags();
if (inodeFeatureFlags & FILEINODE_FEATURE_HAS_ORIG_PARENTID)
{
// i.e. file was renamed between directories, disk data already have origParentEntryID
}
else
this->inodeData.setDynamicOrigParentEntryID(entryInfo->getParentEntryID() );
/* NOTE: origParentUID also might not be on disk, but the deserializer then set it from
* statData->uid */
FileInode* inode = new (std::nothrow) FileInode(this->getID(), &this->inodeData,
this->getEntryType(), dentryFeatureFlags);
if (unlikely(!inode) )
{
LogContext(__func__).logErr("Out of memory, failed to allocate inode.");
return NULL; // out of memory
}
return inode;
}
/**
* Note: Must be called before any of the disk modifying methods
* (otherwise they will fail)
*
* @param path does not include the filename
*/
FhgfsOpsErr DirEntry::storeInitialDirEntry(const std::string& dirEntryPath)
{
const char* logContext = DIRENTRY_LOG_CONTEXT "(store initial dirEntry)";
LOG_DEBUG(logContext, 4, "Storing initial dentry metadata for ID: '" + getEntryID() + "'");
std::string idPath = MetaStorageTk::getMetaDirEntryIDPath(dirEntryPath) + getEntryID();
// first create the dirEntry-by-ID
FhgfsOpsErr entryIdRes = this->storeInitialDirEntryID(logContext, idPath);
if (entryIdRes != FhgfsOpsErr_SUCCESS)
return entryIdRes;
bool nonInlined = DirEntryType_ISDIR(getEntryType()) || !this->getIsInodeInlined();
// eventually the dirEntry-by-name
std::string namePath = dirEntryPath + '/' + this->name;
FhgfsOpsErr result = this->storeInitialDirEntryName(logContext, idPath, namePath, nonInlined);
if (result == FhgfsOpsErr_SUCCESS && getIsBuddyMirrored())
if (auto* resync = BuddyResyncer::getSyncChangeset())
{
if (!nonInlined)
resync->addModification(idPath, MetaSyncFileType::Inode);
resync->addModification(namePath, MetaSyncFileType::Dentry);
}
return result;
}

View File

@@ -0,0 +1,332 @@
#pragma once
#include <common/storage/Metadata.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageErrors.h>
#include <common/Common.h>
#include <toolkit/StorageTkEx.h>
#include "DentryStoreData.h"
#include "DiskMetaData.h"
#include "MetadataEx.h"
#include "FileInodeStoreData.h"
#define DIRENTRY_LOG_CONTEXT "DirEntry "
#define DirEntry_UNLINK_ID 1
#define DirEntry_UNLINK_FILENAME 2
#define DirEntry_UNLINK_ID_AND_FILENAME (DirEntry_UNLINK_ID | DirEntry_UNLINK_FILENAME)
/*
* Class for directory entries (aka "dentries", formerly also referred to as "links"), which
* contains the filename and information about where to find the inode (e.g. for remote dir
* inodes).
*
* Note on locking: In contrast to files/dirs, dentries are not referenced. Every caller/thread
* gets its own copy to work with, so dentry instances are not even shared. That's why we don't
* have a mutex here.
*/
class DirEntry
{
friend class MetaStore;
friend class DirEntryStore;
friend class DirInode;
friend class FileInode;
friend class GenericDebugMsgEx;
friend class RecreateDentriesMsgEx;
public:
DirEntry(DirEntryType entryType, const std::string& name, const std::string& entryID,
NumNodeID ownerNodeID) : dentryDiskData(entryID, entryType, ownerNodeID, 0), name(name)
{
}
/**
* Note: This constructor does not perform initialization, so use it for
* metadata loading only.
*/
DirEntry(const std::string& entryName) : name(entryName)
{
// this->name = entryName; // set in initializer list
}
static DirEntry* createFromFile(const std::string& path, const std::string& entryName);
static DirEntryType loadEntryTypeFromFile(const std::string& path,
const std::string& entryName);
protected:
bool loadFromID(const std::string& dirEntryPath, const std::string& entryID);
private:
DentryStoreData dentryDiskData; // data stored on disk
FileInodeStoreData inodeData;
std::string name; // the user-friendly name, note: not set on reading entries anymore
FhgfsOpsErr storeInitialDirEntryID(const char* logContext, const std::string& idPath);
static FhgfsOpsErr storeInitialDirEntryName(const char* logContext, const std::string& idPath,
const std::string& namePath, bool isNonInlinedInode);
bool storeUpdatedDirEntryBuf(const std::string& idStorePath, char* buf, unsigned bufLen);
bool storeUpdatedDirEntryBufAsXAttr(const std::string& idStorePath, char* buf,
unsigned bufLen);
bool storeUpdatedDirEntryBufAsContents(const std::string& idStorePath, char* buf,
unsigned bufLen);
bool storeUpdatedDirEntry(const std::string& dirEntryPath);
FhgfsOpsErr storeUpdatedInode(const std::string& dirEntryPath);
static FhgfsOpsErr removeDirEntryName(const char* logContext, const std::string& filePath,
bool isBuddyMirrored);
FhgfsOpsErr removeBusyFile(const std::string& dirEntryBasePath, const std::string& entryID,
const std::string& entryName, unsigned unlinkTypeFlags);
FileInode* createInodeByID(const std::string& dirEntryPath, EntryInfo* entryInfo);
bool loadFromFileName(const std::string& dirEntryPath, const std::string& entryName);
bool loadFromFile(const std::string& path);
bool loadFromFileXAttr(const std::string& path);
bool loadFromFileContents(const std::string& path);
static DirEntryType loadEntryTypeFromFileXAttr(const std::string& path,
const std::string& entryName);
static DirEntryType loadEntryTypeFromFileContents(const std::string& path,
const std::string& entryName);
FhgfsOpsErr storeInitialDirEntry(const std::string& dirEntryPath);
static FhgfsOpsErr removeDirEntryFile(const std::string& filePath);
static FhgfsOpsErr removeDirEntryID(const std::string& dirEntryPath,
const std::string& entryID, bool isBuddyMirrored);
public:
// inliners
/**
* Remove file dentries.
*
* If the argument is a directory the caller already must have checked if the directory
* is empty.
*/
static FhgfsOpsErr removeFileDentry(const std::string& dirEntryPath,
const std::string& entryID, const std::string& entryName, unsigned unlinkTypeFlags,
bool isBuddyMirrored)
{
const char* logContext = DIRENTRY_LOG_CONTEXT "(remove stored file dentry)";
FhgfsOpsErr retVal;
// first we delete entry-by-name and use this retVal as return code
if (unlinkTypeFlags & DirEntry_UNLINK_FILENAME)
{
std::string namePath = dirEntryPath + '/' + entryName;
retVal = removeDirEntryName(logContext, namePath, isBuddyMirrored);
if (retVal == FhgfsOpsErr_SUCCESS && (unlinkTypeFlags & DirEntry_UNLINK_ID) )
{
// once the dirEntry-by-name was successfully unlinked, unlink dirEntry-by-ID
removeDirEntryID(dirEntryPath, entryID, isBuddyMirrored); // error code is ignored
}
else
{
/* We must not try to delete the ID file on FhgfsOpsErr_NOTEXISTS, as during a race
* (possible locking issue) the file may have been renamed and so the ID might be
* still valid.
*/
}
}
else
if (unlinkTypeFlags & DirEntry_UNLINK_ID)
retVal = removeDirEntryID(dirEntryPath, entryID, isBuddyMirrored);
else
{
/* It might happen that the code was supposed to unlink an ID only, but if the inode
* has a link count > 1, even the ID is not supposed to be unlinked. So unlink
* is a no-op then. */
retVal = FhgfsOpsErr_SUCCESS;
}
return retVal;
}
/**
* Remove directory dentries.
*
* If the argument is a directory the caller already must have checked if the directory
* is empty.
*/
static FhgfsOpsErr removeDirDentry(const std::string& dirEntryPath,
const std::string& entryName, bool isBuddyMirrored)
{
const char* logContext = DIRENTRY_LOG_CONTEXT "(remove stored directory dentry)";
std::string namePath = dirEntryPath + '/' + entryName;
FhgfsOpsErr retVal = removeDirEntryName(logContext, namePath, isBuddyMirrored);
return retVal;
}
// getters & setters
/**
* Set a new ownerNodeID, used by fsck or generic debug message.
*/
bool setOwnerNodeID(const std::string& dirEntryPath, NumNodeID newOwner)
{
bool success = true;
// only dentries without inlined inodes have an ownerNode field, trying to set the owner
// node on a dentry with an inlined inode is thus impossible.
if (getIsInodeInlined())
return false;
NumNodeID oldOwner = this->getOwnerNodeID();
this->dentryDiskData.setOwnerNodeID(newOwner);
if (!storeUpdatedDirEntry(dirEntryPath))
{ // failed to update metadata => restore old value
this->dentryDiskData.setOwnerNodeID(oldOwner);
success = false;
}
return success;
}
void setFileInodeData(FileInodeStoreData& inodeData)
{
this->inodeData = inodeData;
unsigned updatedFlags = getDentryFeatureFlags() |
(DENTRY_FEATURE_INODE_INLINE | DENTRY_FEATURE_IS_FILEINODE);
setDentryFeatureFlags(updatedFlags);
}
void setBuddyMirrorFeatureFlag()
{
addDentryFeatureFlag(DENTRY_FEATURE_BUDDYMIRRORED);
}
bool getIsBuddyMirrored()
{
return (getDentryFeatureFlags() & DENTRY_FEATURE_BUDDYMIRRORED);
}
unsigned getDentryFeatureFlags()
{
return this->dentryDiskData.getDentryFeatureFlags();
}
void setDentryFeatureFlags(unsigned featureFlags)
{
this->dentryDiskData.setDentryFeatureFlags(featureFlags);
}
void addDentryFeatureFlag(unsigned featureFlag)
{
this->dentryDiskData.addDentryFeatureFlag(featureFlag);
}
void removeDentryFeatureFlag(unsigned featureFlag)
{
this->dentryDiskData.removeDentryFeatureFlag(featureFlag);
}
// getters
const std::string& getEntryID()
{
return this->dentryDiskData.getEntryID();
}
const std::string& getID()
{
return this->dentryDiskData.getEntryID();
}
/**
* Note: Should not be changed after object init => not synchronized!
*/
DirEntryType getEntryType()
{
return this->dentryDiskData.getDirEntryType();
}
const std::string& getName()
{
return this->name;
}
NumNodeID getOwnerNodeID()
{
return this->dentryDiskData.getOwnerNodeID();
}
void getEntryInfo(const std::string& parentEntryID, int flags, EntryInfo* outEntryInfo)
{
if (getIsInodeInlined() )
flags |= ENTRYINFO_FEATURE_INLINED;
if (getIsBuddyMirrored())
flags |= ENTRYINFO_FEATURE_BUDDYMIRRORED;
outEntryInfo->set(getOwnerNodeID(), parentEntryID, getID(), name,
getEntryType(), flags);
}
/**
* Unset the DENTRY_FEATURE_INODE_INLINE flag
*/
void unsetInodeInlined()
{
uint16_t dentryFeatureFlags = this->dentryDiskData.getDentryFeatureFlags();
dentryFeatureFlags &= ~(DENTRY_FEATURE_INODE_INLINE);
this->dentryDiskData.setDentryFeatureFlags(dentryFeatureFlags);
}
/**
* Check if the inode is inlined and no flag is set to indicate the same object
* (file-as-hard-link) is also in the inode-hash directories.
*/
bool getIsInodeInlined()
{
if (this->dentryDiskData.getDentryFeatureFlags() & DENTRY_FEATURE_INODE_INLINE)
return true;
return false;
}
void serializeDentry(Serializer& ser)
{
DiskMetaData diskMetaData(&this->dentryDiskData, &this->inodeData);
diskMetaData.serializeDentry(ser);
}
void deserializeDentry(Deserializer& des)
{
DiskMetaData diskMetaData(&this->dentryDiskData, &this->inodeData);
diskMetaData.deserializeDentry(des);
}
protected:
FileInodeStoreData* getInodeStoreData(void)
{
return &this->inodeData;
}
};

View File

@@ -0,0 +1,777 @@
#include <program/Program.h>
#include <toolkit/StorageTkEx.h>
#include "DirEntryStore.h"
#include <dirent.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
/**
* Get a dir-entry-path
*
* Note: Not supposed to be used outside of this file and shall be inlined
*/
static inline std::string getDirEntryStoreDynamicEntryPath(const std::string& parentID,
bool isBuddyMirrored)
{
App* app = Program::getApp();
if ( isBuddyMirrored )
return MetaStorageTk::getMetaDirEntryPath(
app->getBuddyMirrorDentriesPath()->str(), parentID);
else
return MetaStorageTk::getMetaDirEntryPath(app->getDentriesPath()->str(),
parentID);
}
/**
* Note: Sets the parentID to an invalid value, so do not forget to set the parentID before
* adding any elements.
*/
DirEntryStore::DirEntryStore() :
parentID("<undef>"), isBuddyMirrored(false)
{
}
/**
* @param parentID ID of the directory to which this store belongs
* @param isBuddyMirrored true if the directory to which this store belongs is buddy mirrored
*/
DirEntryStore::DirEntryStore(const std::string& parentID, bool isBuddyMirrored) :
parentID(parentID), dirEntryPath(getDirEntryStoreDynamicEntryPath(parentID, isBuddyMirrored) ),
isBuddyMirrored(isBuddyMirrored)
{
}
/*
* Create the new directory for dentries (dir-entries). This directory will contain directory
* entries of files and sub-directories of the directory given by dirID.
*/
FhgfsOpsErr DirEntryStore::mkDentryStoreDir(const std::string& dirID, bool isBuddyMirrored)
{
const char* logContext = "Directory (store initial metadata dir)";
App* app = Program::getApp();
const Path* dentriesPath =
isBuddyMirrored ? app->getBuddyMirrorDentriesPath() : app->getDentriesPath();
std::string contentsDirStr = MetaStorageTk::getMetaDirEntryPath(
dentriesPath->str(), dirID);
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
// create (contents) directory, which will hold directory entries of subdirs and subfiles
int mkDirRes = mkdir(contentsDirStr.c_str(), 0755);
if(mkDirRes)
{ // error
if(errno == EEXIST)
retVal = FhgfsOpsErr_EXISTS;
else
{
LogContext(logContext).logErr("Unable to create contents directory: " + contentsDirStr +
". " + "SysErr: " + System::getErrString() );
retVal = FhgfsOpsErr_INTERNAL;
}
return retVal;
}
LOG_DEBUG(logContext, 4, "Metadata dir created: " + contentsDirStr);
// create the dirEntryID directory, which allows access to inlined inodes via dirID access
std::string contentsDirIDStr = MetaStorageTk::getMetaDirEntryIDPath(contentsDirStr);
int mkDirIDRes = mkdir(contentsDirIDStr.c_str(), 0755);
if(mkDirIDRes)
{ // So contentsDirStr worked, but creating the sub directory contentsDirIDStr failed
LogContext(logContext).logErr("Unable to create dirEntryID directory: " +
contentsDirIDStr + ". " + "SysErr: " + System::getErrString() );
retVal = FhgfsOpsErr_INTERNAL;
int unlinkRes = unlink(contentsDirStr.c_str() );
if (unlinkRes)
{
// we can only write a log message here, but can't do anything about it
LogContext(logContext).logErr("Failed to remove: " + contentsDirStr +
". " + "SysErr: " + System::getErrString() );
}
return retVal;
}
if (isBuddyMirrored)
if (auto* resync = BuddyResyncer::getSyncChangeset())
{
resync->addModification(contentsDirStr, MetaSyncFileType::Directory);
resync->addModification(contentsDirIDStr, MetaSyncFileType::Directory);
}
return retVal;
}
/**
* Remove directory for dir-entries (dentries)
*
* Note: Assumes that the caller already verified that the directory is empty
*/
bool DirEntryStore::rmDirEntryStoreDir(const std::string& id, bool isBuddyMirrored)
{
const char* logContext = "Directory (remove contents dir)";
App* app = Program::getApp();
const Path* dentriesPath =
isBuddyMirrored ? app->getBuddyMirrorDentriesPath() : app->getDentriesPath();
std::string contentsDirStr = MetaStorageTk::getMetaDirEntryPath(
dentriesPath->str(), id);
std::string contentsDirIDStr = MetaStorageTk::getMetaDirEntryIDPath(contentsDirStr);
LOG_DEBUG(logContext, Log_DEBUG,
"Removing content directory: " + contentsDirStr + "; " "id: " + id + "; isBuddyMirrored: "
+ StringTk::intToStr(isBuddyMirrored));
// remove the dirEntryID directory
int rmdirIdRes = rmdir(contentsDirIDStr.c_str() );
if(rmdirIdRes)
{ // error
LogContext(logContext).logErr("Unable to delete dirEntryID directory: " + contentsDirIDStr +
". " + "SysErr: " + System::getErrString() );
if (errno != ENOENT)
return false;
}
if (isBuddyMirrored)
if (auto* resync = BuddyResyncer::getSyncChangeset())
resync->addDeletion(contentsDirIDStr, MetaSyncFileType::Directory);
// remove contents directory
int rmdirRes = rmdir(contentsDirStr.c_str() );
if(rmdirRes)
{ // error
LogContext(logContext).logErr("Unable to delete contents directory: " + contentsDirStr +
". " + "SysErr: " + System::getErrString() );
if (errno != ENOENT)
return false;
}
LOG_DEBUG(logContext, 4, "Contents directory deleted: " + contentsDirStr);
if (isBuddyMirrored)
if (auto* resync = BuddyResyncer::getSyncChangeset())
resync->addDeletion(contentsDirStr, MetaSyncFileType::Directory);
return true;
}
/**
* @param file belongs to the store after calling this method - so do not free it and don't
* use it any more afterwards (re-get it from this store if you need it)
*/
FhgfsOpsErr DirEntryStore::makeEntry(DirEntry* entry)
{
FhgfsOpsErr mkRes;
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE);
mkRes = makeEntryUnlocked(entry);
safeLock.unlock();
return mkRes;
}
/**
* @param file belongs to the store after calling this method - so do not free it and don't
* use it any more afterwards (re-get it from this store if you need it)
*/
FhgfsOpsErr DirEntryStore::makeEntryUnlocked(DirEntry* entry)
{
const std::string& dirEntryPath = getDirEntryPathUnlocked();
const char* logContext = "make meta dir-entry";
FhgfsOpsErr mkRes = entry->storeInitialDirEntry(dirEntryPath);
if (unlikely(mkRes != FhgfsOpsErr_SUCCESS) && mkRes != FhgfsOpsErr_EXISTS)
LogContext(logContext).logErr(std::string("Failed to create: name: ") + entry->getName() +
std::string(" entryID: ") + entry->getID() + " in path: " + dirEntryPath);
return mkRes;
}
/**
* Create a new dir-entry based on an inode, which is stored in dir-entry format
* (with inlined inode)
*/
FhgfsOpsErr DirEntryStore::linkInodeToDir(const std::string& inodePath, const std::string &fileName)
{
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE);
FhgfsOpsErr retVal = linkInodeToDirUnlocked(inodePath, fileName);
safeLock.unlock();
return retVal;
}
FhgfsOpsErr DirEntryStore::linkInodeToDirUnlocked(const std::string& inodePath,
const std::string& fileName)
{
const char* logContext = "make meta dir-entry";
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
std::string dirEntryPath = getDirEntryPathUnlocked() + '/' + fileName;
int linkRes = link(inodePath.c_str(), dirEntryPath.c_str() );
if (linkRes)
{
LogContext(logContext).logErr(std::string("Failed to create link from: ") + inodePath +
" To: " + dirEntryPath + " SysErr: " + System::getErrString() );
retVal = FhgfsOpsErr_INTERNAL;
}
if (getIsBuddyMirrored())
if (auto* resync = BuddyResyncer::getSyncChangeset())
resync->addModification(dirEntryPath, MetaSyncFileType::Inode);
return retVal;
}
/**
* note: only for dir dentries
*
* @param outDirEntry is object of the removed dirEntry, maybe NULL of the caller does not need it
*/
FhgfsOpsErr DirEntryStore::removeDir(const std::string& entryName, DirEntry** outDirEntry)
{
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE);
FhgfsOpsErr delErr = removeDirUnlocked(entryName, outDirEntry);
safeLock.unlock();
return delErr;
}
/**
* note: only for file dentries
*
* @param unlinkEntryName If false do not try to unlink the dentry-name entry, entryName even
* might not be set.
* @param outEntry will be set to the unlinked file and the object must then be deleted by the
* caller (can be NULL if the caller is not interested in the file)
*/
FhgfsOpsErr DirEntryStore::unlinkDirEntry(const std::string& entryName, DirEntry* entry,
unsigned unlinkTypeFlags)
{
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE);
FhgfsOpsErr delErr = unlinkDirEntryUnlocked(entryName, entry, unlinkTypeFlags);
safeLock.unlock();
return delErr;
}
/**
* Note: only for dir dentries
* Note: This does not lock the mutex, so it must already be locked when calling this.
*
* @param outDirEntry is object of the removed dirEntry, maybe NULL of the caller does not need it
*/
FhgfsOpsErr DirEntryStore::removeDirUnlocked(const std::string& entryName, DirEntry** outDirEntry)
{
SAFE_ASSIGN(outDirEntry, NULL);
DirEntry* entry = DirEntry::createFromFile(getDirEntryPathUnlocked(), entryName);
if(!entry)
return FhgfsOpsErr_PATHNOTEXISTS;
if(!DirEntryType_ISDIR(entry->getEntryType() ) )
{
delete(entry);
return FhgfsOpsErr_PATHNOTEXISTS;
}
FhgfsOpsErr retVal = DirEntry::removeDirDentry(getDirEntryPathUnlocked(), entryName,
entry->getIsBuddyMirrored());
if (outDirEntry)
*outDirEntry = entry;
else
delete(entry);
return retVal;
}
/**
* Note: only for file dentries
* Note: This does not lock the mutex, so it must already be locked when calling this.
*
* @param inEntry might be NULL and the entry then needs to be loaded from disk.
* @param unlinkEntryName If false do not try to unlink the dentry-name entry, entryName even
* might not be set. So !unlinkEntryName and !inEntry together are a logical conflict.
* @param outEntry will be set to the unlinked file and the object must then be deleted by the
* caller (can be NULL if the caller is not interested in the file). Also will not be set if
* inEntry is not NULL, as the caller already knows the DirEntry then.
* @param outEntryID for callers that need the entryID and want to avoid the overhead of using
* outEntry (can be NULL if the caller is not interested); only set on success
*/
FhgfsOpsErr DirEntryStore::unlinkDirEntryUnlocked(const std::string& entryName, DirEntry* entry,
unsigned unlinkTypeFlags)
{
FhgfsOpsErr delErr = DirEntry::removeFileDentry(getDirEntryPathUnlocked(), entry->getID(),
entryName, unlinkTypeFlags, entry->getIsBuddyMirrored());
return delErr;
}
/**
* Create a hardlink from 'fromEntryName' to 'toEntryName'.
*
* NOTE: Only do this for the entries in the same directory with inlined inodes. In order to avoid
* a wrong link count on a possible crash/reboot, the link count already must have been
* increased by 1 (If the link count is too high after a crash we might end up with leaked
* chunk files on unlink, but not with missing chunk files if done the other way around).
*/
FhgfsOpsErr DirEntryStore::linkEntryInDir(const std::string& fromEntryName,
const std::string& toEntryName)
{
const char *logContext = "DirEntryStore renameEntry";
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE);
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
std::string fromPath = getDirEntryPathUnlocked() + '/' + fromEntryName;
std::string toPath = getDirEntryPathUnlocked() + '/' + toEntryName;
int linkRes = link(fromPath.c_str(), toPath.c_str() );
if (linkRes)
{
if (errno == EEXIST)
retVal = FhgfsOpsErr_EXISTS;
else
{
LogContext(logContext).logErr(std::string("Failed to link file in dir: ") +
getDirEntryPathUnlocked() + " from: " + fromEntryName + " to: " + toEntryName +
". SysErr: " + System::getErrString() );
retVal = FhgfsOpsErr_INTERNAL;
}
}
safeLock.unlock();
if (getIsBuddyMirrored())
if (auto* resync = BuddyResyncer::getSyncChangeset())
resync->addModification(toPath, MetaSyncFileType::Dentry);
return retVal;
}
/**
* In constrast to the moving...()-methods, this method performs a simple rename of an entry,
* where no moving is involved.
*
* @param outRemovedEntry accoring to the rules, this can only be an overwritten file, not a dir
*
* NOTE: The caller already must have done rename-sanity checks.
*/
FhgfsOpsErr DirEntryStore::renameEntry(const std::string& fromEntryName,
const std::string& toEntryName)
{
const char *logContext = "DirEntryStore renameEntry";
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE);
FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS;
std::string fromPath = getDirEntryPathUnlocked() + '/' + fromEntryName;
std::string toPath = getDirEntryPathUnlocked() + '/' + toEntryName;
int renameRes = rename(fromPath.c_str(), toPath.c_str() );
if (renameRes)
{
LogContext(logContext).logErr(std::string("Failed to rename file in dir: ") +
getDirEntryPathUnlocked() + " from: " + fromEntryName + " to: " + toEntryName +
". SysErr: " + System::getErrString() );
retVal = FhgfsOpsErr_INTERNAL;
}
safeLock.unlock();
if (isBuddyMirrored)
if (auto* resync = BuddyResyncer::getSyncChangeset())
{
resync->addDeletion(fromPath, MetaSyncFileType::Dentry);
resync->addModification(toPath, MetaSyncFileType::Dentry);
}
return retVal;
}
/**
* Note: serverOffset is an internal value and should not be assumed to be just 0, 1, 2, 3, ...;
* so make sure you use either 0 (at the beginning) or something that has been returned by this
* method as offset (similar to posix telldir/seekdir).
*
* Note: You have reached the end of the directory when success is returned and
* "outNames.size() != maxOutNames".
*
* @param serverOffset zero-based offset; represents the native local fs offset (as in telldir() ).
* @param filterDots true if "." and ".." should not be returned.
* @param outArgs outNewOffset is only valid if return value indicates success,
* outEntryTypes and outEntryIDs may be NULL, the rest is required.
*/
FhgfsOpsErr DirEntryStore::listIncrementalEx(int64_t serverOffset,
unsigned maxOutNames, bool filterDots, ListIncExOutArgs& outArgs)
{
// note: we need offsets here that are stable after unlink, because apps like bonnie++ use
// readdir(), then unlink() the returned files and expect readdir() to continue after that.
// this won't work if we use our own offset number and skip the given number of initial
// entries each time. that's why we use the native local file system offset and seek here.
const char* logContext = "DirEntryStore (list inc)";
FhgfsOpsErr retVal = FhgfsOpsErr_INTERNAL;
uint64_t numEntries = 0;
struct dirent* dirEntry = NULL;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ); // L O C K
DIR* dirHandle = opendir(getDirEntryPathUnlocked().c_str() );
if(!dirHandle)
{
LogContext(logContext).logErr(std::string("Unable to open dentry directory: ") +
getDirEntryPathUnlocked() + ". SysErr: " + System::getErrString() );
goto err_unlock;
}
// seek to offset (if provided)
if(serverOffset)
{
seekdir(dirHandle, serverOffset); // (seekdir has no return value)
}
// loop over the actual directory entries
for( ;
(numEntries < maxOutNames) &&
(dirEntry = StorageTk::readdirFilteredEx(dirHandle, filterDots, true) );
numEntries++)
{
outArgs.outNames->push_back(dirEntry->d_name);
if(outArgs.outServerOffsets)
outArgs.outServerOffsets->push_back(dirEntry->d_off);
SAFE_ASSIGN(outArgs.outNewServerOffset, dirEntry->d_off);
if(outArgs.outEntryTypes || outArgs.outEntryIDs)
{
DirEntryType entryType;
std::string entryID;
if(!filterDots && !strcmp(dirEntry->d_name, ".") )
{
entryType = DirEntryType_DIRECTORY;
entryID = "<.>";
}
else
if(!filterDots && !strcmp(dirEntry->d_name, "..") )
{
entryType = DirEntryType_DIRECTORY;
entryID = "<..>";
}
else
{ // load dentry metadata
DirEntry entry(dirEntry->d_name);
bool loadSuccess = entry.loadFromFileName(getDirEntryPathUnlocked(), dirEntry->d_name);
if (likely(loadSuccess) )
{
entryType = entry.getEntryType();
entryID = entry.getEntryID();
}
else
{ // loading failed
entryType = DirEntryType_INVALID;
entryID = "<invalid>";
errno = 0;
}
}
if(outArgs.outEntryTypes)
outArgs.outEntryTypes->push_back( (int)entryType);
if (outArgs.outEntryIDs)
outArgs.outEntryIDs->push_back(entryID);
}
}
if(!dirEntry && errno)
{
LogContext(logContext).logErr(std::string("Unable to fetch links directory entry from: ") +
getDirEntryPathUnlocked() + ". SysErr: " + System::getErrString() );
}
else
{ // all entries read
retVal = FhgfsOpsErr_SUCCESS;
}
closedir(dirHandle);
err_unlock:
safeLock.unlock(); // U N L O C K
return retVal;
}
/**
* Note: serverOffset is an internal value and should not be assumed to be just 0, 1, 2, 3, ...;
* so make sure you use either 0 (at the beginning) or something that has been returned by this
* method as outNewOffset.
* Note: You have reached the end of the directory when "outNames.size() != maxOutNames".
* Note: This function was written for fsck
*
* @param serverOffset zero-based offset; represents the native local fs offset; preferred over
* incrementalOffset; use -1 here if you want to seek to e.g. the n-th element and use the slow
* incrementalOffset for that case.
* @param incrementalOffset zero-based offset; only used if serverOffset is -1; skips the given
* number of entries.
* @param outArgs outNewOffset is only valid if return value indicates success, outEntryTypes is
* not used (NULL), outNames is required.
*/
FhgfsOpsErr DirEntryStore::listIDFilesIncremental(int64_t serverOffset,
uint64_t incrementalOffset, unsigned maxOutNames, ListIncExOutArgs& outArgs)
{
// note: we need offsets here that are stable after unlink, because apps like bonnie++ use
// readdir(), then unlink() the returned files and expect readdir() to continue after that.
// this won't work if we use our own offset number and skip the given number of initial
// entries each time. that's why we use the native local file system offset and seek here.
// (incrementalOffset is only provided as fallback for client directory seeks and should
// be avoided when possible.)
const char* logContext = "DirEntryStore (list ID files inc)";
FhgfsOpsErr retVal = FhgfsOpsErr_INTERNAL;
uint64_t numEntries = 0;
struct dirent* dirEntry = NULL;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ); // L O C K
std::string path = MetaStorageTk::getMetaDirEntryIDPath(getDirEntryPathUnlocked());
DIR* dirHandle = opendir(path.c_str() );
if(!dirHandle)
{
LogContext(logContext).logErr(std::string("Unable to open dentry-by-ID directory: ") +
path + ". SysErr: " + System::getErrString() );
goto err_unlock;
}
errno = 0; // recommended by posix (readdir(3p) )
// seek to offset
if(serverOffset != -1)
{ // caller provided direct offset
seekdir(dirHandle, serverOffset); // (seekdir has no return value)
}
else
{ // slow path: incremental seek to current offset
for(uint64_t currentOffset = 0;
(currentOffset < incrementalOffset) && (dirEntry=StorageTk::readdirFiltered(dirHandle) );
currentOffset++)
{
// (actual seek work done in loop header)
*outArgs.outNewServerOffset = dirEntry->d_off;
}
}
// the actual entry reading
for( ; (numEntries < maxOutNames) && (dirEntry = StorageTk::readdirFiltered(dirHandle) );
numEntries++)
{
outArgs.outNames->push_back(dirEntry->d_name);
*outArgs.outNewServerOffset = dirEntry->d_off;
}
if(!dirEntry && errno)
{
LogContext(logContext).logErr(std::string("Unable to fetch dentry-by-ID entry from: ") +
path + ". SysErr: " + System::getErrString() );
}
else
{ // all entries read
retVal = FhgfsOpsErr_SUCCESS;
}
closedir(dirHandle);
err_unlock:
safeLock.unlock(); // U N L O C K
return retVal;
}
bool DirEntryStore::exists(const std::string& entryName)
{
bool existsRes = false;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ); // L O C K
existsRes = existsUnlocked(entryName);
safeLock.unlock(); // U N L O C K
return existsRes;
}
bool DirEntryStore::existsUnlocked(const std::string& entryName)
{
bool existsRes = false;
std::string filepath = getDirEntryPathUnlocked() + "/" + entryName;
struct stat statBuf;
int statRes = stat(filepath.c_str(), &statBuf);
if(!statRes)
existsRes = true;
return existsRes;
}
/**
* @param outInodeMetaData might be NULL
* @return DirEntryType_INVALID if no such dir exists
*
*/
FhgfsOpsErr DirEntryStore::getEntryData(const std::string& entryName, EntryInfo* outInfo,
FileInodeStoreData* outInodeMetaData)
{
FhgfsOpsErr retVal = FhgfsOpsErr_PATHNOTEXISTS;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ); // L O C K
DirEntry entry(entryName);
bool loadRes = entry.loadFromFileName(getDirEntryPathUnlocked(), entryName);
if(loadRes)
{
/* copy FileInodeStoreData from entry to outInodeMetaData. We also do not want to allocate
* a new stripe pattern, so we simply copy the pointer and set
* entry.dentryInodeMeta.stripePattern = NULL. So on destruction of the object, it will not
* be deleted. */
FileInodeStoreData* inodeDiskData = entry.getInodeStoreData();
if (outInodeMetaData)
{
*outInodeMetaData = *inodeDiskData;
inodeDiskData->setPattern(NULL);
}
int flags = entry.getIsInodeInlined() ? ENTRYINFO_FEATURE_INLINED : 0;
if (entry.getIsBuddyMirrored())
flags |= ENTRYINFO_FEATURE_BUDDYMIRRORED;
NumNodeID ownerNodeID = entry.getOwnerNodeID();
std::string entryID = entry.getID();
outInfo->set(ownerNodeID, this->parentID, entryID, entryName,
entry.getEntryType(), flags);
retVal = FhgfsOpsErr_SUCCESS;
}
safeLock.unlock(); // U N L O C K
return retVal;
}
/**
* Load and return the dir-entry of the given entry-/fileName
*/
DirEntry* DirEntryStore::dirEntryCreateFromFile(const std::string& entryName)
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ); // L O C K
DirEntry* dirEntry= DirEntry::createFromFile(this->getDirEntryPathUnlocked(), entryName);
safeLock.unlock(); // U N L O C K
return dirEntry;
}
/**
* Note: Unlocked, so make sure you hold the lock when calling this.
*/
const std::string& DirEntryStore::getDirEntryPathUnlocked() const
{
return dirEntryPath;
}
/**
* @param entryName
* @param ownerNode
*
* @return FhgfsOpsErr based on the result of the operation
*/
FhgfsOpsErr DirEntryStore::setOwnerNodeID(const std::string& entryName, NumNodeID ownerNode)
{
FhgfsOpsErr retVal = FhgfsOpsErr_INTERNAL;
DirEntry entry(entryName);
bool loadRes;
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE);
loadRes = entry.loadFromFileName(getDirEntryPathUnlocked(), entryName);
if(!loadRes)
{ // no such entry
retVal = FhgfsOpsErr_PATHNOTEXISTS;
}
else
{
if ( entry.setOwnerNodeID(getDirEntryPathUnlocked(), ownerNode) )
{
retVal = FhgfsOpsErr_SUCCESS;
}
}
safeLock.unlock();
return retVal;
}
/**
* Note: Unlocked. Use this only during init.
*/
void DirEntryStore::setParentID(const std::string& parentID, bool parentIsBuddyMirrored)
{
this->parentID = parentID;
this->dirEntryPath = getDirEntryStoreDynamicEntryPath(parentID, parentIsBuddyMirrored);
this->isBuddyMirrored = parentIsBuddyMirrored;
}

View File

@@ -0,0 +1,201 @@
#pragma once
#include <common/Common.h>
#include <common/threading/Mutex.h>
#include <common/toolkit/MetadataTk.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageErrors.h>
#include "DirEntry.h"
struct ListIncExOutArgs
{
ListIncExOutArgs(StringList* outNames, UInt8List* outEntryTypes, StringList* outEntryIDs,
Int64List* outServerOffsets, int64_t* outNewServerOffset) :
outNames(outNames), outEntryTypes(outEntryTypes), outEntryIDs(outEntryIDs),
outServerOffsets(outServerOffsets), outNewServerOffset(outNewServerOffset)
{
// see initializer list
}
StringList* outNames; /* required */
UInt8List* outEntryTypes; /* optional (can be NULL if caller is not interested; contains
DirEntryType stored as int to ease message serialization) */
StringList* outEntryIDs; /* optional (may be NULL if caller is not interested) */
Int64List* outServerOffsets; /* optional (may be NULL if caller is not interested) */
int64_t* outNewServerOffset; /* optional (may be NULL), equals last value from
outServerOffsets */
};
class DirEntryStore
{
friend class DirInode;
friend class MetaStore;
public:
DirEntryStore();
DirEntryStore(const std::string& parentID, bool isBuddyMirrored);
FhgfsOpsErr makeEntry(DirEntry* entry);
FhgfsOpsErr linkEntryInDir(const std::string& fromEntryName, const std::string& toEntryName);
FhgfsOpsErr linkInodeToDir(const std::string& inodePath, const std::string &fileName);
FhgfsOpsErr removeDir(const std::string& entryName, DirEntry** outDirEntry);
FhgfsOpsErr unlinkDirEntry(const std::string& entryName, DirEntry* entry,
unsigned unlinkTypeFlags);
FhgfsOpsErr renameEntry(const std::string& fromEntryName, const std::string& toEntryName);
FhgfsOpsErr listIncrementalEx(int64_t serverOffset, unsigned maxOutNames, bool filterDots,
ListIncExOutArgs& outArgs);
FhgfsOpsErr listIDFilesIncremental(int64_t serverOffset, uint64_t incrementalOffset,
unsigned maxOutNames, ListIncExOutArgs& outArgs);
bool exists(const std::string& entryName);
FhgfsOpsErr getEntryData(const std::string& entryName, EntryInfo* outInfo,
FileInodeStoreData* outInodeMetaData);
FhgfsOpsErr setOwnerNodeID(const std::string& entryName, NumNodeID ownerNode);
DirEntry* dirEntryCreateFromFile(const std::string& entryName);
static FhgfsOpsErr mkDentryStoreDir(const std::string& dirID, bool isBuddyMirrored);
static bool rmDirEntryStoreDir(const std::string& id, bool isBuddyMirrored);
private:
std::string parentID; // ID of the directory to which this store belongs
std::string dirEntryPath; /* path to dirEntry, without the last element (fileName)
* depends on parentID, so changes when parentID is set */
RWLock rwlock;
bool isBuddyMirrored;
FhgfsOpsErr makeEntryUnlocked(DirEntry* entry);
FhgfsOpsErr linkInodeToDirUnlocked(const std::string& inodePath, const std::string &fileName);
FhgfsOpsErr removeDirUnlocked(const std::string& entryName, DirEntry** outDirEntry);
FhgfsOpsErr unlinkDirEntryUnlocked(const std::string& entryName, DirEntry* entry,
unsigned unlinkTypeFlags);
bool existsUnlocked(const std::string& entryName);
const std::string& getDirEntryPathUnlocked() const;
public:
// inliners
/**
* @return false if no link with this name exists
*/
bool getDentry(const std::string& entryName, DirEntry& outEntry)
{
// note: the difference to getDirDentry/getFileDentry is that this works independent
// of the link type
bool exists = false;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
exists = outEntry.loadFromFileName(getDirEntryPathUnlocked(), entryName);
safeLock.unlock();
return exists;
}
const std::string& getParentEntryID() const
{
return this->parentID;
}
/**
* @return false if no dir dentry (dir-entry) with this name exists or its type is not dir
*/
bool getDirDentry(const std::string& entryName, DirEntry& outEntry)
{
bool getRes = getDentry(entryName, outEntry);
return getRes && DirEntryType_ISDIR(outEntry.getEntryType() );
}
/**
*
* @return false if no dentry with this name exists or its type is not file
*/
bool getFileDentry(const std::string& entryName, DirEntry& outEntry)
{
bool getRes = getDentry(entryName, outEntry);
return getRes && DirEntryType_ISFILE(outEntry.getEntryType() );
}
bool getEntryInfo(const std::string& entryName, EntryInfo& outEntryInfo)
{
DirEntry entry(entryName);
std::string parentEntryID = this->getParentEntryID();
int additionalFlags = 0; // unknown
bool getRes = getDentry(entryName, entry);
if (getRes == true)
entry.getEntryInfo(parentEntryID, additionalFlags, &outEntryInfo);
return getRes;
}
bool getFileEntryInfo(const std::string& entryName, EntryInfo& outEntryInfo)
{
bool getRes = getEntryInfo(entryName, outEntryInfo);
return getRes && DirEntryType_ISFILE(outEntryInfo.getEntryType() );
}
bool getDirEntryInfo(const std::string& entryName, EntryInfo& outEntryInfo)
{
bool getRes = getEntryInfo(entryName, outEntryInfo);
return getRes && DirEntryType_ISDIR(outEntryInfo.getEntryType() );
}
std::string getDirEntryPath()
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ); // L O C K
std::string dirEntryPath = this->dirEntryPath;
safeLock.unlock();
return dirEntryPath;
}
/*
* Note: No locking here, isBuddyMirrored should only be set on initialization
*/
bool getIsBuddyMirrored() const
{
return this->isBuddyMirrored;
}
// getters & setters
void setParentID(const std::string& parentID, bool parentIsBuddyMirrored);
private:
/**
* Handle the unlink of a file, for which we need to keep the inode.
*/
FhgfsOpsErr removeBusyFile(const std::string& entryName, DirEntry* dentry,
unsigned unlinkTypeFlags)
{
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE); // Lock
FhgfsOpsErr retVal = dentry->removeBusyFile(getDirEntryPathUnlocked(), dentry->getID(),
entryName, unlinkTypeFlags);
safeLock.unlock();
return retVal;
}
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,653 @@
#pragma once
#include <common/storage/striping/StripePattern.h>
#include <common/storage/RemoteStorageTarget.h>
#include <common/storage/Metadata.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageErrors.h>
#include <common/threading/UniqueRWLock.h>
#include <common/storage/StatData.h>
#include <common/Common.h>
#include "DirEntryStore.h"
#include "MetadataEx.h"
#include "InodeFileStore.h"
/* Note: Don't forget to update DiskMetaData::getSupportedDirInodeFeatureFlags() if you add new
* flags here. */
#define DIRINODE_FEATURE_EARLY_SUBDIRS 2 // indicate proper alignment for statData
#define DIRINODE_FEATURE_MIRRORED 4 // indicate old mirrored directory (for compatibility)
#define DIRINODE_FEATURE_STATFLAGS 8 // StatData have a flags field
#define DIRINODE_FEATURE_BUDDYMIRRORED 16 // indicate buddy mirrored directory
#define DIRINODE_FEATURE_HAS_RST 32 // indicates remote target availability
// limit number of stripes per file to a high but safe number. too many stripe targets will cause
// the serialized stripe pattern to be too large to store reliably, so choose a value well below
// that limit (but still high enough to not bother anyone).
// the number of stripe targets is limited by:
// * stripe patterns in inodes: for each target, 32 bits of target ID are stored
// * chunk size infos: for each target, 64 bits of #used_blocks may be stored
// at 256 targets, those two structures may use up to 3072 bytes, leaving ample room for other
// inode data.
#define DIRINODE_MAX_STRIPE_TARGETS 256
/**
* Our inode object, but for directories only. Files are in class FileInode.
*/
class DirInode
{
friend class MetaStore;
friend class InodeDirStore;
friend class DiskMetaData;
public:
DirInode(const std::string& id, int mode, unsigned userID, unsigned groupID,
NumNodeID ownerNodeID, const StripePattern& stripePattern, bool isBuddyMirrored);
/**
* Constructur used to load inodes from disk
* Note: Not all values are set, as we load those from disk.
*/
DirInode(const std::string& id, bool isBuddyMirrored)
: id(id),
stripePattern(NULL),
featureFlags(isBuddyMirrored ? DIRINODE_FEATURE_BUDDYMIRRORED : 0),
exclusive(false),
entries(id, isBuddyMirrored),
isLoaded(false)
{ }
~DirInode()
{
LOG_DEBUG("Delete DirInode", Log_SPAM, std::string("Deleting inode: ") + this->id);
SAFE_DELETE_NOSET(stripePattern);
}
static DirInode* createFromFile(const std::string& id, bool isBuddyMirrored);
StripePattern* createFileStripePattern(const UInt16List* preferredTargets,
unsigned numtargets, unsigned chunksize, StoragePoolId storagePoolId);
FhgfsOpsErr listIncremental(int64_t serverOffset,
unsigned maxOutNames, StringList* outNames, int64_t* outNewServerOffset);
FhgfsOpsErr listIncrementalEx(int64_t serverOffset,
unsigned maxOutNames, bool filterDots, ListIncExOutArgs& outArgs);
FhgfsOpsErr listIDFilesIncremental(int64_t serverOffset, uint64_t incrementalOffset,
unsigned maxOutNames, ListIncExOutArgs& outArgs);
bool exists(const std::string& entryName);
FhgfsOpsErr makeDirEntry(DirEntry& entry);
FhgfsOpsErr linkFileInodeToDir(const std::string& inodePath, const std::string& fileName);
FhgfsOpsErr removeDir(const std::string& entryName, DirEntry** outDirEntry);
FhgfsOpsErr unlinkDirEntry(const std::string& entryName, DirEntry* entry,
unsigned unlinkTypeFlags);
bool loadIfNotLoaded(void);
void invalidate();
FhgfsOpsErr refreshMetaInfo();
// non-inlined getters & setters
FhgfsOpsErr setOwnerNodeID(const std::string& entryName, NumNodeID ownerNode);
StripePattern* getStripePatternClone();
FhgfsOpsErr setStripePattern(const StripePattern& newPattern, uint32_t actorUID = 0);
FhgfsOpsErr setRemoteStorageTarget(const RemoteStorageTarget& rst);
FhgfsOpsErr clearRemoteStorageTarget();
FhgfsOpsErr getStatData(StatData& outStatData,
NumNodeID* outParentNodeID = NULL, std::string* outParentEntryID = NULL);
void setStatData(StatData& statData);
bool setAttrData(int validAttribs, SettableFileAttribs* attribs);
FhgfsOpsErr setDirParentAndChangeTime(EntryInfo* entryInfo, NumNodeID parentNodeID);
std::pair<FhgfsOpsErr, StringVector> listXAttr(const EntryInfo* file);
std::tuple<FhgfsOpsErr, std::vector<char>, ssize_t> getXAttr(const EntryInfo* file,
const std::string& xAttrName, size_t maxSize);
FhgfsOpsErr removeXAttr(EntryInfo* file, const std::string& xAttrName);
FhgfsOpsErr setXAttr(EntryInfo* file, const std::string& xAttrName,
const CharVector& xAttrValue, int flags, bool updateTimestamps = true);
private:
std::string id; // filesystem-wide unique string
NumNodeID ownerNodeID; // 0 means undefined
StripePattern* stripePattern; // is the default for new files and subdirs
RemoteStorageTarget rstInfo; // remote storage target information
std::string parentDirID; // must be reliable for NFS
NumNodeID parentNodeID; // must be reliable for NFS
uint16_t featureFlags;
bool exclusive; // if set, we do not allow other references
// StatData
StatData statData;
uint32_t numSubdirs; // indirectly updated by subdir creation/removal
uint32_t numFiles; // indirectly updated by subfile creation/removal
RWLock rwlock;
DirEntryStore entries;
/* if not set we have an object that has not read data from disk yet, the dir might not
even exist on disk */
bool isLoaded;
Mutex loadLock; // protects the disk load
InodeFileStore fileStore; /* We must not delete the DirInode as long as this
* InodeFileStore still has entries. Therefore a dir reference
* has to be taken for entry in this InodeFileStore */
StripePattern* createFileStripePatternUnlocked(const UInt16List* preferredTargets,
unsigned numtargets, unsigned chunksize, StoragePoolId storagePoolId);
FhgfsOpsErr storeInitialMetaData();
FhgfsOpsErr storeInitialMetaData(const CharVector& defaultACLXAttr,
const CharVector& accessACLXAttr);
FhgfsOpsErr storeInitialMetaDataInode();
bool storeUpdatedMetaDataBuf(char* buf, unsigned bufLen);
bool storeUpdatedMetaDataBufAsXAttr(char* buf, unsigned bufLen);
bool storeUpdatedMetaDataBufAsContents(char* buf, unsigned bufLen);
bool storeUpdatedMetaDataBufAsContentsInPlace(char* buf, unsigned bufLen);
bool storeUpdatedMetaDataUnlocked();
bool storeRemoteStorageTargetInfoUnlocked();
bool storeRemoteStorageTargetDataBufAsXAttr(char* buf, unsigned bufLen);
FhgfsOpsErr renameDirEntry(const std::string& fromName, const std::string& toName,
DirEntry* overWriteEntry);
FhgfsOpsErr renameDirEntryUnlocked(const std::string& fromName, const std::string& toName,
DirEntry* overWriteEntry);
static bool removeStoredMetaData(const std::string& id);
static bool removeStoredMetaDataDir(const std::string& id);
static bool removeStoredMetaDataFile(const std::string& id, bool isBuddyMirrored);
FhgfsOpsErr refreshMetaInfoUnlocked();
FhgfsOpsErr makeDirEntryUnlocked(DirEntry* entry);
FhgfsOpsErr linkFileInodeToDirUnlocked(const std::string& inodePath,
const std::string &fileName);
FhgfsOpsErr removeDirUnlocked(const std::string& entryName, DirEntry** outDirEntry);
FhgfsOpsErr unlinkDirEntryUnlocked(const std::string& entryName, DirEntry* inEntry,
unsigned unlinkTypeFlags);
FhgfsOpsErr refreshSubentryCountUnlocked();
bool loadFromFile();
bool loadFromFileXAttr();
bool loadFromFileContents();
bool loadIfNotLoadedUnlocked();
bool loadRstFromFileXAttr();
FhgfsOpsErr getEntryData(const std::string& entryName, EntryInfo* outInfo,
FileInodeStoreData* outInodeMetaData);
bool setAttrDataUnlocked(int validAttribs, SettableFileAttribs* attribs);
FhgfsOpsErr setDirParentAndChangeTimeUnlocked(EntryInfo* entryInfo, NumNodeID parentNodeID);
bool unlinkBusyFileUnlocked(const std::string& fileName, DirEntry* dentry,
unsigned unlinkTypeFlags);
protected:
FhgfsOpsErr linkFilesInDirUnlocked(const std::string& fromName, FileInode& fromInode,
const std::string& toName);
FhgfsOpsErr setIsBuddyMirrored(const bool isBuddyMirrored);
public:
// inliners
/**
* Note: Must be called before any of the mutator methods (otherwise they will fail)
*/
FhgfsOpsErr storePersistentMetaData()
{
return storeInitialMetaData();
}
FhgfsOpsErr storePersistentMetaData(const CharVector& defaultACLXAttr,
const CharVector& accessACLXAttr)
{
return storeInitialMetaData(defaultACLXAttr, accessACLXAttr);
}
/**
* Unlink the dir-inode file on disk.
* Note: Assumes that the caller already verified that the directory is empty
*/
static bool unlinkStoredInode(const std::string& id, bool isBuddyMirrored)
{
bool dirRes = DirEntryStore::rmDirEntryStoreDir(id, isBuddyMirrored);
if(!dirRes)
return dirRes;
return removeStoredMetaDataFile(id, isBuddyMirrored);
}
/**
* Note: Intended to be used by fsck only.
*/
FhgfsOpsErr storeAsReplacementFile(const std::string& id)
{
// note: creates new dir metadata file for non-existing or invalid one => no locking needed
removeStoredMetaDataFile(id, this->getIsBuddyMirrored());
return storeInitialMetaDataInode();
}
/**
* Return create a DirEntry for the given file name
*
* note: this->rwlock already needs to be locked
*/
DirEntry* dirEntryCreateFromFileUnlocked(const std::string& entryName)
{
return this->entries.dirEntryCreateFromFile(entryName);
}
/**
* Get a dentry
* note: the difference to getDirEntryInfo/getFileEntryInfo is that this works independent
* of the entry-type
*/
bool getDentry(const std::string& entryName, DirEntry& outEntry)
{
bool retVal;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
retVal = getDentryUnlocked(entryName, outEntry);
safeLock.unlock();
return retVal;
}
/**
* Get a dentry
* note: the difference to getDirEntryInfo/getFileEntryInfo is that this works independent
* of the entry-type
*/
bool getDentryUnlocked(const std::string& entryName, DirEntry& outEntry)
{
return this->entries.getDentry(entryName, outEntry);
}
/**
* Get the dentry (dir-entry) of a directory
*/
bool getDirDentry(const std::string& dirName, DirEntry& outEntry)
{
bool retVal;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
retVal = entries.getDirDentry(dirName, outEntry);
safeLock.unlock();
return retVal;
}
/*
* Get the dentry (dir-entry) of a file
*/
bool getFileDentry(const std::string& fileName, DirEntry& outEntry)
{
bool retVal;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
retVal = entries.getFileDentry(fileName, outEntry);
safeLock.unlock();
return retVal;
}
/**
* Get the EntryInfo
* note: the difference to getDirEntryInfo/getFileEntryInfo is that this works independent
* of the entry-type
*/
bool getEntryInfo(const std::string& entryName, EntryInfo& outEntryInfo)
{
bool retVal;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
retVal = this->entries.getEntryInfo(entryName, outEntryInfo);
safeLock.unlock();
return retVal;
}
/**
* Get the EntryInfo of a directory
*/
bool getDirEntryInfo(const std::string& dirName, EntryInfo& outEntryInfo)
{
bool retVal;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
retVal = entries.getDirEntryInfo(dirName, outEntryInfo);
safeLock.unlock();
return retVal;
}
/*
* Get the dir-entry of a file
*/
bool getFileEntryInfo(const std::string& fileName, EntryInfo& outEntryInfo)
{
bool retVal;
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
retVal = entries.getFileEntryInfo(fileName, outEntryInfo);
safeLock.unlock();
return retVal;
}
const std::string& getID() const
{
return id;
}
NumNodeID getOwnerNodeID()
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
NumNodeID owner = ownerNodeID;
safeLock.unlock();
return owner;
}
bool setOwnerNodeID(NumNodeID newOwner)
{
bool success = true;
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE);
bool loadSuccess = loadIfNotLoadedUnlocked();
if (!loadSuccess)
{
safeLock.unlock();
return false;
}
NumNodeID oldOwner = this->ownerNodeID;
this->ownerNodeID = newOwner;
if(!storeUpdatedMetaDataUnlocked() )
{ // failed to update metadata => restore old value
this->ownerNodeID = oldOwner;
success = false;
}
safeLock.unlock();
return success;
}
void getParentInfo(std::string* outParentDirID, NumNodeID* outParentNodeID)
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
*outParentDirID = this->parentDirID;
*outParentNodeID = this->parentNodeID;
safeLock.unlock();
}
/**
* Note: Initial means for newly created objects (=> unlocked, unpersistent)
*/
void setParentInfoInitial(const std::string& parentDirID, NumNodeID parentNodeID)
{
this->parentDirID = parentDirID;
this->parentNodeID = parentNodeID;
}
void setFeatureFlags(unsigned flags)
{
this->featureFlags = flags;
}
void addFeatureFlag(unsigned flag)
{
this->featureFlags |= flag;
}
void removeFeatureFlag(unsigned flag)
{
this->featureFlags &= ~flag;
}
unsigned getFeatureFlags() const
{
return this->featureFlags;
}
void setIsBuddyMirroredFlag(const bool isBuddyMirrored)
{
UniqueRWLock lock(rwlock, SafeRWLock_WRITE);
if (isBuddyMirrored)
featureFlags |= DIRINODE_FEATURE_BUDDYMIRRORED;
else
featureFlags &= ~DIRINODE_FEATURE_BUDDYMIRRORED;
entries.setParentID(entries.getParentEntryID(), isBuddyMirrored);
}
FhgfsOpsErr setAndStoreIsBuddyMirrored(bool isBuddyMirrored)
{
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE);
const FhgfsOpsErr result = setIsBuddyMirrored(isBuddyMirrored);
if (result == FhgfsOpsErr_SUCCESS)
storeUpdatedMetaDataUnlocked();
safeLock.unlock();
return result;
}
bool getIsBuddyMirrored() const
{
return (getFeatureFlags() & DIRINODE_FEATURE_BUDDYMIRRORED);
}
bool getIsRstAvailable() const
{
return (getFeatureFlags() & DIRINODE_FEATURE_HAS_RST);
}
bool getExclusive()
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
bool retVal = this->exclusive;
safeLock.unlock();
return retVal;
}
void setExclusive(bool exclusive)
{
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE);
this->exclusive = exclusive;
safeLock.unlock();
}
unsigned getUserID()
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
unsigned retVal = this->statData.getUserID();
safeLock.unlock();
return retVal;
}
unsigned getUserIDUnlocked()
{
return this->statData.getUserID();
}
unsigned getGroupID()
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
unsigned retVal = this->statData.getGroupID();
safeLock.unlock();
return retVal;
}
int getMode()
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
int retVal = this->statData.getMode();
safeLock.unlock();
return retVal;
}
unsigned getNumHardlinks()
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
unsigned retVal = this->statData.getNumHardlinks();
safeLock.unlock();
return retVal;
}
size_t getNumSubEntries()
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ);
size_t retVal = numSubdirs + numFiles;
safeLock.unlock();
return retVal;
}
bool getIsLoaded()
{
UniqueRWLock lock(rwlock, SafeRWLock_READ);
return isLoaded;
}
static FhgfsOpsErr getStatData(const std::string& dirID, bool isBuddyMirrored,
StatData& outStatData, NumNodeID* outParentNodeID, std::string* outParentEntryID)
{
DirInode dir(dirID, isBuddyMirrored);
if(!dir.loadFromFile() )
return FhgfsOpsErr_PATHNOTEXISTS;
return dir.getStatData(outStatData, outParentNodeID, outParentEntryID);
}
StripePattern* getStripePattern() const
{
return stripePattern;
}
RemoteStorageTarget* getRemoteStorageTargetInfo()
{
return &this->rstInfo;
}
private:
bool updateTimeStampsAndStoreToDisk(const char* logContext)
{
int64_t nowSecs = TimeAbs().getTimeval()->tv_sec;;
this->statData.setAttribChangeTimeSecs(nowSecs);
this->statData.setModificationTimeSecs(nowSecs);
if(unlikely(!storeUpdatedMetaDataUnlocked() ) )
{
LogContext(logContext).logErr(std::string("Failed to update dir-info on disk: "
"Dir-ID: ") + this->getID() + std::string(". SysErr: ") + System::getErrString() );
return false;
}
return true;
}
bool increaseNumSubDirsAndStoreOnDisk(void)
{
const char* logContext = "DirInfo update: increase number of SubDirs";
numSubdirs++;
return updateTimeStampsAndStoreToDisk(logContext);
}
bool increaseNumFilesAndStoreOnDisk(void)
{
const char* logContext = "DirInfo update: increase number of Files";
numFiles++;
return updateTimeStampsAndStoreToDisk(logContext);
}
bool decreaseNumFilesAndStoreOnDisk(void)
{
const char* logContext = "DirInfo update: decrease number of Files";
if (numFiles) // make sure it does not get sub-zero
numFiles--;
return updateTimeStampsAndStoreToDisk(logContext);
}
bool decreaseNumSubDirsAndStoreOnDisk(void)
{
const char* logContext = "DirInfo update: decrease number of SubDirs";
if (numSubdirs) // make sure it does not get sub-zero
numSubdirs--;
return updateTimeStampsAndStoreToDisk(logContext);
}
};

View File

@@ -0,0 +1,851 @@
/*
* Dentry and inode serialization/deserialization.
*
* Note: Currently inodes and dentries are stored in exactly the same format, even if
* inodes are not inlined into a dentry.
* If we should add another inode-only format, all code linking inodes to dentries
* e.g. (MetaStore::unlinkInodeLaterUnlocked() calling dirInode->linkInodeToDir() must
* be updated.
*/
#include <program/Program.h>
#include <common/storage/StorageDefinitions.h>
#include "DiskMetaData.h"
#include "DirInode.h"
#include "FileInode.h"
#include "DirEntry.h"
#define DISKMETADATA_LOG_CONTEXT "DiskMetadata"
// 8-bit
#define DIRENTRY_STORAGE_FORMAT_VER2 2 // version beginning with release 2011.04
#define DIRENTRY_STORAGE_FORMAT_VER3 3 // version, same as V2, but removes the file name
// from the dentry (for dir.dentries)
#define DIRENTRY_STORAGE_FORMAT_VER4 4 // version, which includes inlined inodes, deprecated
#define DIRENTRY_STORAGE_FORMAT_VER5 5 /* version, which includes inlined inodes
* and chunk-path V3, StatData have a flags field */
#define DIRENTRY_STORAGE_FORMAT_VER6 6 /* VER5 + additional storage pool in pattern */
// 8-bit
#define DIRECTORY_STORAGE_FORMAT_VER1 1 // 16 bit node IDs
#define DIRECTORY_STORAGE_FORMAT_VER2 2 // 32 bit node IDs
#define DIRECTORY_STORAGE_FORMAT_VER3 3 // 32 bit node IDs + storage pool in pattern
void DiskMetaData::serializeFileInode(Serializer& ser)
{
// inodeData set in constructor
if (!DirEntryType_ISVALID(this->dentryDiskData->getDirEntryType() ) )
{
StatData* statData = this->inodeData->getInodeStatData();
unsigned mode = statData->getMode();
DirEntryType entryType = MetadataTk::posixFileTypeToDirEntryType(mode);
this->dentryDiskData->setDirEntryType(entryType);
#ifdef BEEGFS_DEBUG
const char* logContext = "Serialize FileInode";
LogContext(logContext).logErr("Bug: entryType not set!");
LogContext(logContext).logBacktrace();
#endif
}
/* We use this method to clone inodes which might be inlined into a dentry, so the real
* meta type depends on if the inode is inlined or not. */
DiskMetaDataType metaDataType;
if (this->dentryDiskData->dentryFeatureFlags & DENTRY_FEATURE_INODE_INLINE)
metaDataType = DiskMetaDataType_FILEDENTRY;
else
metaDataType = DiskMetaDataType_FILEINODE;
serializeInDentryFormat(ser, metaDataType);
}
void DiskMetaData::serializeDentry(Serializer& ser)
{
DiskMetaDataType metaDataType;
if (DirEntryType_ISDIR(this->dentryDiskData->getDirEntryType() ) )
metaDataType = DiskMetaDataType_DIRDENTRY;
else
metaDataType = DiskMetaDataType_FILEDENTRY;
serializeInDentryFormat(ser, metaDataType);
}
/*
* Note: Current object state is used for the serialization
*/
void DiskMetaData::serializeInDentryFormat(Serializer& ser, DiskMetaDataType metaDataType)
{
// note: the total amount of serialized data may not be larger than DIRENTRY_SERBUF_SIZE
int dentryFormatVersion;
// set the type into the entry (1 byte)
ser % uint8_t(metaDataType);
// storage-format-version (1 byte)
if (DirEntryType_ISDIR(this->dentryDiskData->getDirEntryType()))
{
// metadata format version-3 for directories
ser % uint8_t(DIRENTRY_STORAGE_FORMAT_VER3);
dentryFormatVersion = DIRENTRY_STORAGE_FORMAT_VER3;
}
else
{
if (metaDataType == DiskMetaDataType_FILEINODE)
{
dentryFormatVersion = DIRENTRY_STORAGE_FORMAT_VER6;
}
else if ((!(this->dentryDiskData->getDentryFeatureFlags() &
DENTRY_FEATURE_INODE_INLINE)))
{
dentryFormatVersion = DIRENTRY_STORAGE_FORMAT_VER3;
}
else if (this->inodeData->getOrigFeature() == FileInodeOrigFeature_TRUE)
dentryFormatVersion = DIRENTRY_STORAGE_FORMAT_VER6;
else
dentryFormatVersion = DIRENTRY_STORAGE_FORMAT_VER4;
// metadata format version-4 for files (inlined inodes)
ser % uint8_t(dentryFormatVersion);
}
// dentry feature flags (2 bytes)
// note: for newly written/serialized dentries we always use the long nodeIDs
this->dentryDiskData->addDentryFeatureFlag(DENTRY_FEATURE_32BITIDS);
ser % uint16_t(this->dentryDiskData->getDentryFeatureFlags());
// entryType (1 byte)
// (note: we have a fixed position for the entryType byte: DIRENTRY_TYPE_BUF_POS)
ser % uint8_t(this->dentryDiskData->getDirEntryType());
// 3 bytes padding for 8 byte alignment
ser.skip(3);
// end of 8 byte header
switch (dentryFormatVersion)
{
case DIRENTRY_STORAGE_FORMAT_VER3:
serializeDentryV3(ser); // V3, currently for dirs only
break;
case DIRENTRY_STORAGE_FORMAT_VER4:
serializeDentryV4(ser); // V4 for files with inlined inodes
break;
case DIRENTRY_STORAGE_FORMAT_VER5: // gets automatically upgraded to v6
dentryFormatVersion = DIRENTRY_STORAGE_FORMAT_VER6; // inlined inodes + chunk-path-V3
inodeData->getStripePattern()->setStoragePoolId(StoragePoolStore::DEFAULT_POOL_ID);
BEEGFS_FALLTHROUGH;
case DIRENTRY_STORAGE_FORMAT_VER6:
serializeDentryV6(ser); // inlined inodes + chunk-path-V3 + storage pools in pattern
break;
}
}
/*
* Deserialize a dentry buffer. Here we only deserialize basic values and will continue with
* version specific dentry sub functions.
*
* Note: Applies deserialized data directly to the current object
*/
void DiskMetaData::deserializeDentry(Deserializer& des)
{
const char* logContext = DISKMETADATA_LOG_CONTEXT " (Dentry Deserialization)";
// note: assumes that the total amount of serialized data may not be larger than
// DIRENTRY_SERBUF_SIZE
uint8_t formatVersion; // which dentry format version
{
uint8_t metaDataType;
des % metaDataType;
}
des % formatVersion;
{ // dentry feature flags
uint16_t dentryFeatureFlags;
des % dentryFeatureFlags;
if (!des.good())
{
std::string serialType = "Feature flags";
LogContext(logContext).logErr("Deserialization failed: " + serialType);
return;
}
bool compatCheckRes = checkFeatureFlagsCompat(
dentryFeatureFlags, getSupportedDentryFeatureFlags() );
if(unlikely(!compatCheckRes) )
{
des.setBad();
LOG(GENERAL, ERR, "Incompatible DirEntry feature flags found.", hex(dentryFeatureFlags),
hex(getSupportedDentryFeatureFlags()));
return;
}
this->dentryDiskData->setDentryFeatureFlags(dentryFeatureFlags);
}
{
// (note: we have a fixed position for the entryType byte: DIRENTRY_TYPE_BUF_POS)
uint8_t type;
des % type;
this->dentryDiskData->setDirEntryType((DirEntryType)type );
}
// mirrorNodeID (depends on feature flag) + padding
if(this->dentryDiskData->getDentryFeatureFlags() & DENTRY_FEATURE_MIRRORED)
{ // mirrorNodeID + padding
// Note: we have an old-style mirrored file here; what we do is just throw away the mirror
// information here, because we don't need it; when the file gets written back to disk it
// will be written as unmirrored file!
// first of all strip the feature flag, so we do not write it to disk again
this->dentryDiskData->removeDentryFeatureFlag(DENTRY_FEATURE_MIRRORED);
uint16_t mirrorNodeID; // will be thrown away
des % mirrorNodeID;
// 1 byte padding for 8 byte aligned header
des.skip(1);
}
else
{ // 3 bytes padding for 8 byte aligned header
des.skip(3);
}
// end of 8-byte header
switch (formatVersion)
{
case DIRENTRY_STORAGE_FORMAT_VER3:
deserializeDentryV3(des);
return;
case DIRENTRY_STORAGE_FORMAT_VER4:
{
// data inlined, so this node must be the owner
App* app = Program::getApp();
MirrorBuddyGroupMapper* metaBuddyGroupMapper = app->getMetaBuddyGroupMapper();
deserializeDentryV4(des);
if (!des.good())
return;
// setting the owner node ID is a manual action, as it is not saved on disk
// depending on whether the file is mirrored or not, we set nodeID oder buddyGroupID here
NumNodeID ownerNodeID = this->inodeData->getIsBuddyMirrored() ?
NumNodeID(metaBuddyGroupMapper->getLocalGroupID() ) : app->getLocalNode().getNumID();
this->dentryDiskData->setOwnerNodeID(ownerNodeID);
this->inodeData->setOrigFeature(FileInodeOrigFeature_FALSE); // V4 does not have it
} break;
case DIRENTRY_STORAGE_FORMAT_VER5:
{
// data inlined, so this node must be the owner
App* app = Program::getApp();
MirrorBuddyGroupMapper* metaBuddyGroupMapper = app->getMetaBuddyGroupMapper();
deserializeDentryV5(des);
if (!des.good())
return;
// setting the owner node ID is a manual action, as it is not saved on disk
// depending on whether the file is mirrored or not, we set nodeID oder buddyGroupID here
NumNodeID ownerNodeID = this->inodeData->getIsBuddyMirrored() ?
NumNodeID(metaBuddyGroupMapper->getLocalGroupID() ) : app->getLocalNode().getNumID();
this->dentryDiskData->setOwnerNodeID(ownerNodeID);
this->inodeData->setOrigFeature(FileInodeOrigFeature_TRUE); // V5 has the origFeature
// for upgrade to V6 format, immediately add pool ID
inodeData->getStripePattern()->setStoragePoolId(StoragePoolStore::DEFAULT_POOL_ID);
} break;
case DIRENTRY_STORAGE_FORMAT_VER6:
{
// data inlined, so this node must be the owner
App* app = Program::getApp();
MirrorBuddyGroupMapper* metaBuddyGroupMapper = app->getMetaBuddyGroupMapper();
deserializeDentryV6(des);
if (!des.good())
return;
// setting the owner node ID is a manual action, as it is not saved on disk
// depending on whether the file is mirrored or not, we set nodeID oder buddyGroupID here
NumNodeID ownerNodeID = this->inodeData->getIsBuddyMirrored() ?
NumNodeID(metaBuddyGroupMapper->getLocalGroupID() ) : app->getLocalNode().getNumID();
this->dentryDiskData->setOwnerNodeID(ownerNodeID);
this->inodeData->setOrigFeature(FileInodeOrigFeature_TRUE); // V5 has the origFeature
} break;
default:
LogContext(logContext).logErr("Invalid Storage Format: " +
StringTk::uintToStr((unsigned) formatVersion) );
des.setBad();
}
}
/*
* Version 3 format, now only used for directories and for example for disposal files
*/
void DiskMetaData::serializeDentryV3(Serializer& ser)
{
ser
% serdes::stringAlign4(this->dentryDiskData->getEntryID())
% this->dentryDiskData->getOwnerNodeID();
}
/**
* Deserialize dentries, which have the V3 format.
*/
void DiskMetaData::deserializeDentryV3(Deserializer& des)
{
{
std::string entryID;
des % serdes::stringAlign4(entryID);
this->dentryDiskData->setEntryID(entryID);
this->inodeData->setEntryID(entryID);
}
if (this->dentryDiskData->getDentryFeatureFlags() & DENTRY_FEATURE_32BITIDS)
{
// dentry uses 32 bit nodeIDs, so we can just use our regular NumNodeIDs
NumNodeID ownerNodeID;
des % ownerNodeID;
this->dentryDiskData->setOwnerNodeID(ownerNodeID);
}
else
{
// dentry uses old-style 16 bit nodeIDs
uint16_t ownerNodeID;
des % ownerNodeID;
this->dentryDiskData->setOwnerNodeID(NumNodeID(ownerNodeID));
}
}
/*
* Version 4 format, for files with inlined inodes
*/
void DiskMetaData::serializeDentryV4(Serializer& ser)
{
StatData* statData = this->inodeData->getInodeStatData();
StripePattern* stripePattern = this->inodeData->getPattern();
ser % inodeData->getInodeFeatureFlags();
ser.skip(4); // unused, was the chunkHash
ser
% statData->serializeAs(StatDataFormat_DENTRYV4)
% serdes::stringAlign4(this->dentryDiskData->getEntryID())
% stripePattern;
if (inodeData->getInodeFeatureFlags() & FILEINODE_FEATURE_HAS_VERSIONS)
{
ser % inodeData->getFileVersion();
ser % inodeData->getMetaVersion();
}
}
/**
* Deserialize dentries, which have the V4 format, which includes inlined inodes and have the old
* chunk path (V1, by directly in hash dirs)
*/
void DiskMetaData::deserializeDentryV4(Deserializer& des)
{
uint32_t inodeFeatureFlags;
{
des % inodeFeatureFlags;
if (!des.good())
return;
bool compatCheckRes = checkFeatureFlagsCompat(
inodeFeatureFlags, getSupportedDentryV4FileInodeFeatureFlags() );
if(unlikely(!compatCheckRes) )
{
des.setBad();
LOG(GENERAL, ERR, "Incompatible FileInode feature flags found.", hex(inodeFeatureFlags),
hex(getSupportedDentryV4FileInodeFeatureFlags()));
return;
}
this->inodeData->setInodeFeatureFlags(inodeFeatureFlags);
}
// unused, was the chunkHash
des.skip(4);
des % this->inodeData->getInodeStatData()->serializeAs(StatDataFormat_DENTRYV4);
// note: up to here only fixed integers length, below follow variable string lengths
{
std::string entryID;
des % serdes::stringAlign4(entryID);
this->dentryDiskData->setEntryID(entryID);
this->inodeData->setEntryID(entryID);
}
// mirrorNodeID (depends on feature flag)
if(inodeFeatureFlags & FILEINODE_FEATURE_MIRRORED)
{
// Note: we have an old-style mirrored file here; what we do is just throw away the mirror
// information here, because we don't need it; when the file gets written back to disk it
// will be written as unmirrored file!
// first of all strip the feature flag, so we do not write it to disk again
this->inodeData->removeInodeFeatureFlag(FILEINODE_FEATURE_MIRRORED);
uint16_t mirrorNodeID; // will be thrown away
des % mirrorNodeID;
}
{
StripePattern* pattern = StripePattern::deserialize(des, false);
this->inodeData->setPattern(pattern);
}
if (inodeFeatureFlags & FILEINODE_FEATURE_HAS_VERSIONS)
{
des % inodeData->fileVersion;
des % inodeData->metaVersion;
}
inodeData->addInodeFeatureFlag(FILEINODE_FEATURE_HAS_VERSIONS);
// sanity checks
#ifdef BEEGFS_DEBUG
const char* logContext = DISKMETADATA_LOG_CONTEXT " (Dentry Deserialization V4)";
if (unlikely(!(this->dentryDiskData->getDentryFeatureFlags() & DENTRY_FEATURE_IS_FILEINODE)))
{
LogContext(logContext).logErr("Bug: inode data successfully deserialized, but "
"the file-inode flag is not set. ");
return;
}
#endif
}
/*
* Version 6 format, for files with inlined inodes and orig-parentID + orig-UID + storage pool
*
* Note: Serialization in V5 is not supported any longer => auto upgrade to v6
*/
void DiskMetaData::serializeDentryV6(Serializer& ser)
{
StatData* statData = this->inodeData->getInodeStatData();
StripePattern* stripePattern = this->inodeData->getPattern();
uint32_t inodeFeatureFlags = inodeData->getInodeFeatureFlags();
ser % inodeFeatureFlags;
if (inodeFeatureFlags & FILEINODE_FEATURE_HAS_STATE_FLAGS)
{
ser % inodeData->getFileState();
ser.skip(3); // unused (3 bytes) for 8 byte alignment
}
else
{
// unused, for alignment
ser.skip(4);
}
ser % statData->serializeAs(StatDataFormat_FILEINODE);
if (inodeFeatureFlags & FILEINODE_FEATURE_HAS_ORIG_UID)
ser % this->inodeData->getOrigUID();
if (inodeFeatureFlags & FILEINODE_FEATURE_HAS_ORIG_PARENTID)
ser % serdes::stringAlign4(this->inodeData->getOrigParentEntryID());
ser
% serdes::stringAlign4(this->dentryDiskData->getEntryID())
% stripePattern;
if (inodeData->getInodeFeatureFlags() & FILEINODE_FEATURE_HAS_VERSIONS)
{
ser % inodeData->getFileVersion();
ser % inodeData->getMetaVersion();
}
}
/**
* Deserialize dentries, which have the V5 or V6 format. Both include inlined inodes and have the
* new chunk path (V2, which has UID and parentID); Additionally, V6 has a storage pool in stripe
* pattern
*/
void DiskMetaData::deserializeDentryV5V6(Deserializer& des, bool hasStoragePool)
{
unsigned inodeFeatureFlags;
{
des % inodeFeatureFlags;
if (!des.good())
return;
bool compatCheckRes = checkFeatureFlagsCompat(
inodeFeatureFlags, getSupportedDentryV5FileInodeFeatureFlags() );
if(unlikely(!compatCheckRes) )
{
des.setBad();
LOG(GENERAL, ERR, "Incompatible FileInode feature flags found.", hex(inodeFeatureFlags),
hex(getSupportedDentryV5FileInodeFeatureFlags()));
return;
}
this->inodeData->setInodeFeatureFlags(inodeFeatureFlags);
}
if (inodeFeatureFlags & FILEINODE_FEATURE_HAS_STATE_FLAGS)
{
uint8_t state;
des % state;
this->inodeData->setFileState(state);
// unused, for alignment
des.skip(3);
}
else
{
// unused, for alignment
des.skip(4);
}
StatData* statData = this->inodeData->getInodeStatData();
des % statData->serializeAs(StatDataFormat_FILEINODE);
if (inodeFeatureFlags & FILEINODE_FEATURE_HAS_ORIG_UID)
{
unsigned origParentUID;
des % origParentUID;
this->inodeData->setOrigUID(origParentUID);
}
else
{ // no separate field, orig-UID and UID are identical
unsigned origParentUID = statData->getUserID();
this->inodeData->setOrigUID(origParentUID);
}
if (inodeFeatureFlags & FILEINODE_FEATURE_HAS_ORIG_PARENTID)
{
std::string origParentEntryID;
des % serdes::stringAlign4(origParentEntryID);
this->inodeData->setDiskOrigParentEntryID(origParentEntryID);
}
// note: up to here only fixed integers length, below follow variable string lengths
{
std::string entryID;
des % serdes::stringAlign4(entryID);
this->dentryDiskData->setEntryID(entryID);
this->inodeData->setEntryID(entryID);
}
if(inodeFeatureFlags & FILEINODE_FEATURE_MIRRORED)
{
// Note: we have an old-style mirrored file here; what we do is just throw away the mirror
// information here, because we don't need it; when the file gets written back to disk it
// will be written as unmirrored file!
// first of all strip the feature flag, so we do not write it to disk again
this->inodeData->removeInodeFeatureFlag(FILEINODE_FEATURE_MIRRORED);
uint16_t mirrorNodeID; // will be thrown away
des % mirrorNodeID;
}
{
StripePattern* pattern = StripePattern::deserialize(des, hasStoragePool);
this->inodeData->setPattern(pattern);
}
if (inodeFeatureFlags & FILEINODE_FEATURE_HAS_VERSIONS)
{
des % inodeData->fileVersion;
des % inodeData->metaVersion;
}
inodeData->addInodeFeatureFlag(FILEINODE_FEATURE_HAS_VERSIONS);
}
void DiskMetaData::deserializeDentryV5(Deserializer& des)
{
// sanity checks
deserializeDentryV5V6(des, false);
#ifdef BEEGFS_DEBUG
const char* logContext = DISKMETADATA_LOG_CONTEXT " (Dentry Deserialization V5)";
if (unlikely(!(this->dentryDiskData->getDentryFeatureFlags() & DENTRY_FEATURE_IS_FILEINODE)))
{
LogContext(logContext).logErr("Bug: inode data successfully deserialized, but "
"the file-inode flag is not set. ");
return;
}
#endif
}
void DiskMetaData::deserializeDentryV6(Deserializer& des)
{
// sanity checks
deserializeDentryV5V6(des, true);
#ifdef BEEGFS_DEBUG
const char* logContext = DISKMETADATA_LOG_CONTEXT " (Dentry Deserialization V6)";
if (unlikely(!(this->dentryDiskData->getDentryFeatureFlags() & DENTRY_FEATURE_IS_FILEINODE)))
{
LogContext(logContext).logErr("Bug: inode data successfully deserialized, but "
"the file-inode flag is not set. ");
return;
}
#endif
}
/**
* This method is for file-inodes located in the inode-hash directories.
*/
void DiskMetaData::deserializeFileInode(Deserializer& des)
{
// right now file inodes are stored in the dentry format only, that will probably change
// later on.
return deserializeDentry(des);
}
template<typename Inode, typename Ctx>
void DiskMetaData::serializeDirInodeCommonData(Inode& inode, Ctx& ctx)
{
if (likely(inode.featureFlags & DIRINODE_FEATURE_EARLY_SUBDIRS))
ctx % inode.numSubdirs;
ctx % inode.statData.serializeAs(
inode.featureFlags & DIRINODE_FEATURE_STATFLAGS
? StatDataFormat_DIRINODE
: StatDataFormat_DIRINODE_NOFLAGS);
if (unlikely(!(inode.featureFlags & DIRINODE_FEATURE_EARLY_SUBDIRS)))
ctx % inode.numSubdirs;
ctx
% inode.numFiles
% serdes::stringAlign4(inode.id)
% serdes::stringAlign4(inode.parentDirID);
}
/*
* Note: Current object state is used for the serialization
*/
void DiskMetaData::serializeDirInode(Serializer& ser, DirInode& inode)
{
// note: the total amount of serialized data may not be larger than META_SERBUF_SIZE
inode.featureFlags |= (DIRINODE_FEATURE_EARLY_SUBDIRS | DIRINODE_FEATURE_STATFLAGS);
ser
% uint8_t(DiskMetaDataType_DIRINODE)
% uint8_t(DIRECTORY_STORAGE_FORMAT_VER3)
% inode.featureFlags;
serializeDirInodeCommonData<const DirInode>(inode, ser);
ser
% inode.ownerNodeID
% inode.parentNodeID
% inode.stripePattern;
}
/*
* Deserialize a DirInode
*
* Note: Applies deserialized data directly to the current object
*
*/
void DiskMetaData::deserializeDirInode(Deserializer& des, DirInode& outInode)
{
const char* logContext = DISKMETADATA_LOG_CONTEXT " (DirInode Deserialization)";
// note: assumes that the total amount of serialized data may not be larger than META_SERBUF_SIZE
uint8_t formatVersion;
{
uint8_t metaDataType;
des % metaDataType;
if (unlikely(des.good() && metaDataType != DiskMetaDataType_DIRINODE))
{
LogContext(logContext).logErr(
std::string("Deserialization failed: expected DirInode, but got (numeric type): ")
+ StringTk::uintToStr((unsigned) metaDataType) );
des.setBad();
return;
}
}
des % formatVersion;
{
des % outInode.featureFlags;
if (!des.good())
return;
bool compatCheckRes = checkFeatureFlagsCompat(
outInode.featureFlags, getSupportedDirInodeFeatureFlags() );
if(unlikely(!compatCheckRes) )
{
LogContext(logContext).logErr("Incompatible DirInode feature flags found. "
"Used flags (hex): " + StringTk::uintToHexStr(outInode.featureFlags) + "; "
"Supported (hex): " + StringTk::uintToHexStr(getSupportedDirInodeFeatureFlags() ) );
des.setBad();
return;
}
}
serializeDirInodeCommonData(outInode, des);
bool hasStoragePool;
switch (formatVersion)
{
case DIRECTORY_STORAGE_FORMAT_VER1:
{
uint16_t ownerNode;
uint16_t parentNode;
des
% ownerNode
% parentNode;
outInode.ownerNodeID = NumNodeID(ownerNode);
outInode.parentNodeID = NumNodeID(parentNode);
hasStoragePool = false;
break;
}
case DIRECTORY_STORAGE_FORMAT_VER2:
{
des
% outInode.ownerNodeID
% outInode.parentNodeID;
hasStoragePool = false;
break;
}
case DIRECTORY_STORAGE_FORMAT_VER3:
{
des
% outInode.ownerNodeID
% outInode.parentNodeID;
hasStoragePool = true;
break;
}
default:
{
LogContext(logContext).logErr("Incompatible DirInode version found. "
"Version:" + StringTk::uintToStr(formatVersion));
des.setBad();
return;
}
}
// mirrorNodeID (depends on feature flag)
if(outInode.featureFlags & DIRINODE_FEATURE_MIRRORED)
{
// Note: we have an old-style mirrored file here; what we do is just throw away the mirror
// information here, because we don't need it; when the file gets written back to disk it
// will be written as unmirrored file!
// first of all strip the feature flag, so we do not write it to disk again
outInode.removeFeatureFlag(DIRINODE_FEATURE_MIRRORED);
uint16_t mirrorNodeID; // will be thrown away
des % mirrorNodeID;
}
outInode.stripePattern = StripePattern::deserialize(des, hasStoragePool);
if (!hasStoragePool)
outInode.getStripePattern()->setStoragePoolId(StoragePoolStore::DEFAULT_POOL_ID);
}
/**
* @return mask of supported dentry feature flags
*/
unsigned DiskMetaData::getSupportedDentryFeatureFlags()
{
return DENTRY_FEATURE_INODE_INLINE | DENTRY_FEATURE_IS_FILEINODE | DENTRY_FEATURE_MIRRORED
| DENTRY_FEATURE_BUDDYMIRRORED | DENTRY_FEATURE_32BITIDS;
}
/**
* @return mask of supported file inode feature flags, inlined into V4 Dentries
*/
unsigned DiskMetaData::getSupportedDentryV4FileInodeFeatureFlags()
{
return FILEINODE_FEATURE_MIRRORED | FILEINODE_FEATURE_BUDDYMIRRORED |
FILEINODE_FEATURE_HAS_VERSIONS;
}
/**
* @return mask of supported file inode feature flags, inlined into V5 Dentries
*/
unsigned DiskMetaData::getSupportedDentryV5FileInodeFeatureFlags()
{
return FILEINODE_FEATURE_MIRRORED | FILEINODE_FEATURE_BUDDYMIRRORED |
FILEINODE_FEATURE_HAS_ORIG_PARENTID | FILEINODE_FEATURE_HAS_ORIG_UID |
FILEINODE_FEATURE_HAS_VERSIONS | FILEINODE_FEATURE_HAS_RST | FILEINODE_FEATURE_HAS_STATE_FLAGS;
}
/**
* @return mask of supported dir inode feature flags
*/
unsigned DiskMetaData::getSupportedDirInodeFeatureFlags()
{
return DIRINODE_FEATURE_EARLY_SUBDIRS | DIRINODE_FEATURE_MIRRORED | DIRINODE_FEATURE_STATFLAGS |
DIRINODE_FEATURE_BUDDYMIRRORED | DIRINODE_FEATURE_HAS_RST;
}
/**
* Compare usedFeatureFlags with supportedFeatureFlags to find out whether an unsupported
* feature flag is used.
*
* @return false if an unsupported feature flag was set in usedFeatureFlags
*/
bool DiskMetaData::checkFeatureFlagsCompat(unsigned usedFeatureFlags,
unsigned supportedFeatureFlags)
{
unsigned unsupportedFlags = ~supportedFeatureFlags;
return !(usedFeatureFlags & unsupportedFlags);
}

View File

@@ -0,0 +1,78 @@
#pragma once
#include "FileInodeStoreData.h"
#define DIRENTRY_SERBUF_SIZE (1024 * 4) /* make sure that this is always smaller or equal to
* META_SERBUF_SIZE */
#define DISKMETADATA_TYPE_BUF_POS 0
#define DIRENTRY_TYPE_BUF_POS 4
// 8-bit
enum DiskMetaDataType
{
DiskMetaDataType_FILEDENTRY = 1, // may have inlined inodes
DiskMetaDataType_DIRDENTRY = 2,
DiskMetaDataType_FILEINODE = 3, // currently in dentry-format
DiskMetaDataType_DIRINODE = 4,
};
// forward declarations;
class DirInode;
class FileInode;
class DirEntry;
class DentryStoreData;
class FileInodeStoreData;
/**
* Generic class for on-disk storage.
*
* Note: The class is inherited from DirEntry::. But is an object in FileInode:: methods.
*/
class DiskMetaData
{
public:
// used for DirEntry derived class
DiskMetaData(DentryStoreData* dentryData, FileInodeStoreData* inodeData)
{
this->dentryDiskData = dentryData;
this->inodeData = inodeData;
};
void serializeFileInode(Serializer& ser);
void serializeDentry(Serializer& ser);
void deserializeFileInode(Deserializer& des);
void deserializeDentry(Deserializer& des);
static void serializeDirInode(Serializer& ser, DirInode& inode);
static void deserializeDirInode(Deserializer& des, DirInode& outInode);
protected:
DentryStoreData* dentryDiskData; // Not owned by this object!
FileInodeStoreData* inodeData; // Not owned by this object!
private:
void serializeInDentryFormat(Serializer& ser, DiskMetaDataType metaDataType);
void serializeDentryV3(Serializer& ser);
void serializeDentryV4(Serializer& ser);
void serializeDentryV6(Serializer& ser);
void deserializeDentryV3(Deserializer& des);
void deserializeDentryV4(Deserializer& des);
void deserializeDentryV5(Deserializer& des);
void deserializeDentryV6(Deserializer& des);
void deserializeDentryV5V6(Deserializer& des, bool hasStoragePool);
static unsigned getSupportedDentryFeatureFlags();
static unsigned getSupportedDentryV4FileInodeFeatureFlags();
static unsigned getSupportedDentryV5FileInodeFeatureFlags();
static unsigned getSupportedDirInodeFeatureFlags();
static bool checkFeatureFlagsCompat(unsigned usedFeatureFlags,
unsigned supportedFeatureFlags);
template<typename Inode, typename Ctx>
static void serializeDirInodeCommonData(Inode& inode, Ctx& ctx);
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,45 @@
#include "FileInodeStoreData.h"
void FileInodeStoreData::getPathInfo(PathInfo* outPathInfo)
{
const char* logContext = "FileInode getPathInfo";
unsigned flags;
FileInodeOrigFeature origFeature = getOrigFeature();
switch (origFeature)
{
case FileInodeOrigFeature_TRUE:
{
flags = PATHINFO_FEATURE_ORIG;
} break;
case FileInodeOrigFeature_FALSE:
{
flags = 0;
} break;
default:
case FileInodeOrigFeature_UNSET:
{
flags = PATHINFO_FEATURE_ORIG_UNKNOWN;
LogContext(logContext).logErr("Bug: Unknown PathInfo status.");
} break;
}
unsigned origParentUID = getOrigUID();
const std::string& origParentEntryID = getOrigParentEntryID();
outPathInfo->set(origParentUID, origParentEntryID, flags);
}
bool FileInodeStoreData::operator==(const FileInodeStoreData& second) const
{
return inodeFeatureFlags == second.inodeFeatureFlags
&& inodeStatData == second.inodeStatData
&& entryID == second.entryID
&& stripePattern->stripePatternEquals(second.stripePattern)
&& origFeature == second.origFeature
&& origParentUID == second.origParentUID
&& origParentEntryID == second.origParentEntryID;
}

View File

@@ -0,0 +1,437 @@
/*
* Data of a FileInode stored on disk.
*/
#pragma once
#include <common/storage/striping/StripePattern.h>
#include <common/storage/Metadata.h>
#include <common/storage/StatData.h>
#include <common/storage/PathInfo.h>
/* Note: Don't forget to update DiskMetaData::getSupportedFileInodeFeatureFlags() if you add new
* flags here. */
#define FILEINODE_FEATURE_MIRRORED 1 // indicate mirrored inodes
#define FILEINODE_FEATURE_BUDDYMIRRORED 8 // indicate mirrored inodes
// note: original parent-id and uid are required for the chunk-path calculation
#define FILEINODE_FEATURE_HAS_ORIG_PARENTID 16 // parent-id was updated
#define FILEINODE_FEATURE_HAS_ORIG_UID 32 // uid was updated
#define FILEINODE_FEATURE_HAS_STATFLAGS 64 // stat-data have their own flags
#define FILEINODE_FEATURE_HAS_VERSIONS 128 // file has a cto version counter
#define FILEINODE_FEATURE_HAS_RST 256 // file has remote targets
#define FILEINODE_FEATURE_HAS_STATE_FLAGS 512 // file has state flags (access state + data state)
enum FileInodeOrigFeature
{
FileInodeOrigFeature_UNSET = -1,
FileInodeOrigFeature_TRUE,
FileInodeOrigFeature_FALSE
};
// Access control flags (lower 5 bits)
// NOTE: The naming of access flags might seem counter-intuitive.
//
// - UNLOCKED means no access restrictions are in place
// - READ_LOCK means READ operations are BLOCKED, allowing only write operations.
// The file is effectively "write-only"
// - WRITE_LOCK means WRITE operations are BLOCKED, allowing only read operations.
// The file is effectively "read-only"
// - When both READ_LOCK and WRITE_LOCK are set, all access is blocked
namespace AccessFlags {
constexpr uint8_t UNLOCKED = 0x00; // No flags set (no restrictions)
constexpr uint8_t READ_LOCK = 0x01; // Bit 0 (1 << 0) - Block reads
constexpr uint8_t WRITE_LOCK = 0x02; // Bit 1 (1 << 1) - Block writes
constexpr uint8_t RESERVED3 = 0x04; // Bit 2 (1 << 2) - Reserved for future use
constexpr uint8_t RESERVED4 = 0x08; // Bit 3 (1 << 3) - Reserved for future use
constexpr uint8_t RESERVED5 = 0x10; // Bit 4 (1 << 4) - Reserved for future use
}
// Represents data state (HSM application defined) (0-7)
using DataState = uint8_t;
class FileState {
public:
static constexpr uint8_t ACCESS_FLAGS_MASK = 0x1F; // 0001 1111 (5 bits)
static constexpr uint8_t DATA_STATE_MASK = 0xE0; // 1110 0000 (3 bits)
static constexpr uint8_t DATA_STATE_SHIFT = 5; // Number of bits to shift
// Constructor taking a raw byte value
explicit FileState(uint8_t value = 0) : raw(value) {}
uint8_t getAccessFlags() const
{
return raw & ACCESS_FLAGS_MASK;
}
DataState getDataState() const
{
return (raw & DATA_STATE_MASK) >> DATA_STATE_SHIFT;
}
bool isReadLocked() const
{
return (raw & AccessFlags::READ_LOCK) != 0;
}
bool isWriteLocked() const
{
return (raw & AccessFlags::WRITE_LOCK) != 0;
}
bool isUnlocked() const
{
return (getAccessFlags() == 0);
}
bool isFullyLocked() const
{
return (isReadLocked() && isWriteLocked());
}
uint8_t getRawValue() const { return raw; }
private:
uint8_t raw; // Raw byte representing access flags + data state of a file
};
/* inode data inlined into a direntry, such as in DIRENTRY_STORAGE_FORMAT_VER3 */
class FileInodeStoreData
{
friend class FileInode;
friend class DirEntry;
friend class DirEntryStore;
friend class GenericDebugMsgEx;
friend class LookupIntentMsgEx; // just to avoid to copy two time statData
friend class RecreateDentriesMsgEx;
friend class RetrieveDirEntriesMsgEx;
friend class MetaStore;
friend class DiskMetaData;
friend class AdjustChunkPermissionsMsgEx;
friend class TestSerialization; // for testing
public:
FileInodeStoreData()
: inodeFeatureFlags(FILEINODE_FEATURE_HAS_VERSIONS),
stripePattern(NULL),
origFeature(FileInodeOrigFeature_UNSET),
fileVersion(0),
metaVersion(0),
rawFileState(0)
{ }
FileInodeStoreData(const std::string& entryID, StatData* statData,
StripePattern* stripePattern, unsigned featureFlags, unsigned origParentUID,
const std::string& origParentEntryID, FileInodeOrigFeature origFeature)
: inodeFeatureFlags(featureFlags),
inodeStatData(*statData),
entryID(entryID),
origFeature(origFeature),
origParentUID(origParentUID),
origParentEntryID(origParentEntryID),
fileVersion(0),
metaVersion(0), rawFileState(0)
{
this->stripePattern = stripePattern->clone();
if ((statData->getUserID() != origParentUID) &&
(origFeature == FileInodeOrigFeature_TRUE) )
this->inodeFeatureFlags |= FILEINODE_FEATURE_HAS_ORIG_UID;
}
bool operator==(const FileInodeStoreData& second) const;
bool operator!=(const FileInodeStoreData& other) const { return !(*this == other); }
/**
* Used to set the values from those read from disk
*/
FileInodeStoreData(std::string entryID, FileInodeStoreData* diskData) :
entryID(entryID)
{
// this->entryID = entryID; // set in initializer
this->stripePattern = NULL;
setFileInodeStoreData(diskData);
}
~FileInodeStoreData()
{
SAFE_DELETE_NOSET(this->stripePattern);
}
private:
uint32_t inodeFeatureFlags; // feature flags for the inode itself, e.g. for mirroring
StatData inodeStatData;
std::string entryID; // filesystem-wide unique string
StripePattern* stripePattern;
FileInodeOrigFeature origFeature; // indirectly determined via dentry-version
uint32_t origParentUID;
std::string origParentEntryID;
// version number for CTO cache consistency optimizations
uint32_t fileVersion;
uint32_t metaVersion;
// raw byte value representing access flags + data state
uint8_t rawFileState;
void getPathInfo(PathInfo* outPathInfo);
public:
StatData* getInodeStatData()
{
return &this->inodeStatData;
}
std::string getEntryID()
{
return this->entryID;
}
StripePattern* getStripePattern()
{
return this->stripePattern;
}
uint32_t getFileVersion() const { return fileVersion; }
uint32_t getMetaVersion() const { return metaVersion; }
void setFileVersion(uint32_t fileVersion)
{
this->fileVersion = fileVersion;
}
void setMetaVersion(uint32_t metaVersion)
{
this->metaVersion = metaVersion;
setMetaVersionStat(metaVersion); //update metadata version in StatData
}
protected:
/**
* Set all fileInodeStoreData
*
* Note: Might update existing values and if these are allocated, such as stripePattern,
* these need to be freed first.
*/
void setFileInodeStoreData(FileInodeStoreData* diskData)
{
SAFE_DELETE_NOSET(this->stripePattern);
this->stripePattern = diskData->getStripePattern()->clone();
this->inodeStatData = *(diskData->getInodeStatData() );
this->inodeFeatureFlags = diskData->getInodeFeatureFlags();
this->origFeature = diskData->origFeature;
this->origParentUID = diskData->origParentUID;
this->origParentEntryID = diskData->origParentEntryID;
this->fileVersion = diskData->fileVersion;
setMetaVersion(diskData->metaVersion);
this->rawFileState = diskData->rawFileState;
}
void setInodeFeatureFlags(unsigned flags)
{
this->inodeFeatureFlags = flags;
}
void addInodeFeatureFlag(unsigned flag)
{
this->inodeFeatureFlags |= flag;
}
void removeInodeFeatureFlag(unsigned flag)
{
this->inodeFeatureFlags &= ~flag;
}
void setBuddyMirrorFeatureFlag(bool mirrored)
{
if (mirrored)
addInodeFeatureFlag(FILEINODE_FEATURE_BUDDYMIRRORED);
else
removeInodeFeatureFlag(FILEINODE_FEATURE_BUDDYMIRRORED);
}
bool getIsBuddyMirrored() const
{
return (getInodeFeatureFlags() & FILEINODE_FEATURE_BUDDYMIRRORED);
}
void setIsRstAvailable(bool available)
{
if (available)
addInodeFeatureFlag(FILEINODE_FEATURE_HAS_RST);
else
removeInodeFeatureFlag(FILEINODE_FEATURE_HAS_RST);
}
bool getIsRstAvailable() const
{
return (getInodeFeatureFlags() & FILEINODE_FEATURE_HAS_RST);
}
void setFileState(uint8_t value)
{
this->rawFileState = value;
if (this->rawFileState == 0)
removeInodeFeatureFlag(FILEINODE_FEATURE_HAS_STATE_FLAGS);
else
addInodeFeatureFlag(FILEINODE_FEATURE_HAS_STATE_FLAGS);
}
uint8_t getFileState() const
{
// If FILEINODE_FEATURE_HAS_STATE_FLAGS is not set,
// return the default "unlocked + zero data state"
if (!(inodeFeatureFlags & FILEINODE_FEATURE_HAS_STATE_FLAGS))
{
constexpr uint8_t defaultAccessFlags = AccessFlags::UNLOCKED;
constexpr uint8_t defaultDataState = 0;
// Format: [dataState (upper 3 bits) | accessFlags (lower 5 bits)]
return (defaultAccessFlags & FileState::ACCESS_FLAGS_MASK) |
((defaultDataState << FileState::DATA_STATE_SHIFT) & FileState::DATA_STATE_MASK);
}
// Return the explicitly set state if feature is supported
return rawFileState;
}
void setInodeStatData(StatData& statData)
{
this->inodeStatData = statData;
}
void setEntryID(const std::string& entryID)
{
this->entryID = entryID;
}
void setPattern(StripePattern* pattern)
{
this->stripePattern = pattern;
}
void setOrigUID(unsigned origParentUID)
{
this->origParentUID = origParentUID;
}
/**
* Set the origParentEntryID based on the parentDir. Feature flag will not be updated.
* This is for inodes which are not de-inlined and not renamed between dirs.
*/
void setDynamicOrigParentEntryID(const std::string& origParentEntryID)
{
/* Never overwrite existing data! Callers do not check if they need to set it only we do
* that here. */
if (!(this->inodeFeatureFlags & FILEINODE_FEATURE_HAS_ORIG_PARENTID) )
this->origParentEntryID = origParentEntryID;
}
/**
* Set the origParentEntryID from disk, no feature flag test and
* feature flag will not be updated.
* Note: Use this for disk-deserialization.
*/
void setDiskOrigParentEntryID(const std::string& origParentEntryID)
{
this->origParentEntryID = origParentEntryID;
}
/**
* Set the origParentEntryID. Feature flag will be updated, origInformation will be stored
* to disk.
* Note: Use this on file renames between dirs and de-inlining.
*/
void setPersistentOrigParentEntryID(const std::string& origParentEntryID)
{
/* Never overwrite existing data! Callers do not check if they need to set it only we do
* that here. */
if ( (!(this->inodeFeatureFlags & FILEINODE_FEATURE_HAS_ORIG_PARENTID) ) &&
(this->getOrigFeature() == FileInodeOrigFeature_TRUE) )
{
this->origParentEntryID = origParentEntryID;
addInodeFeatureFlag(FILEINODE_FEATURE_HAS_ORIG_PARENTID);
}
}
uint32_t getInodeFeatureFlags() const
{
return this->inodeFeatureFlags;
}
StripePattern* getPattern()
{
return this->stripePattern;
}
void setOrigFeature(FileInodeOrigFeature value)
{
this->origFeature = value;
}
FileInodeOrigFeature getOrigFeature() const
{
return this->origFeature;
}
uint32_t getOrigUID() const
{
return this->origParentUID;
}
const std::string& getOrigParentEntryID() const
{
return this->origParentEntryID;
}
void incDecNumHardlinks(int value)
{
this->inodeStatData.incDecNumHardLinks(value);
}
void setNumHardlinks(unsigned numHardlinks)
{
this->inodeStatData.setNumHardLinks(numHardlinks);
}
void setMetaVersionStat(unsigned metaVersion)
{
this->inodeStatData.setMetaVersionStat(metaVersion);
}
unsigned getNumHardlinks() const
{
return this->inodeStatData.getNumHardlinks();
}
/**
* Return the pattern and set the internal pattern to NULL to make sure it does not get
* deleted on object destruction.
*/
StripePattern* getStripePatternAndSetToNull()
{
StripePattern* pattern = this->stripePattern;
this->stripePattern = NULL;
return pattern;
}
void setAttribChangeTimeSecs(int64_t attribChangeTimeSecs)
{
this->inodeStatData.setAttribChangeTimeSecs(attribChangeTimeSecs);
}
};

View File

@@ -0,0 +1,112 @@
#include <common/threading/UniqueRWLock.h>
#include <common/threading/RWLockGuard.h>
#include <program/Program.h>
#include "GlobalInodeLockStore.h"
/**
* Note: remember to call releaseFileInode()
*
* @return false of file is already in store or inode creation failed, true if file was inserted
*/
bool GlobalInodeLockStore::insertFileInode(EntryInfo* entryInfo)
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
std::string entryID = entryInfo->getEntryID();
GlobalInodeLockMapIter iter = this->inodes.find(entryID);
if(iter == this->inodes.end())
{ // not in map yet => try to insert it in map
LOG_DBG(GENERAL, SPAM, "Insert file inode in GlobalInodeLockStore.", ("FileInodeID", iter->first));
FileInode* inode = FileInode::createFromEntryInfo(entryInfo);
if(!inode)
return false;
this->inodes.insert(GlobalInodeLockMap::value_type(entryID, inode));
this->inodeTimes.insert(GlobalInodeTimestepMap::value_type(entryID, 0.0));
return true;
}
return false;
}
/**
* @return false if file was not in file or time store, true if we found the file in the file and time store
*/
bool GlobalInodeLockStore::releaseFileInode(const std::string& entryID)
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
GlobalInodeLockMapIter iter = this->inodes.find(entryID);
if(iter != this->inodes.end() )
{ // inode is in the map, release it
LOG_DBG(GENERAL, SPAM, "Release file inode in GlobalInodeLockStore.", ("FileInodeID", iter->first));
delete(iter->second);
this->inodes.erase(iter);
return this->releaseInodeTime(entryID);
}
return false;
}
/**
* Note: SafeRWLock_WRITE lock should be held here
*
* @return false if file was not in time store at all, true if we found the file in the time store
*/
bool GlobalInodeLockStore::releaseInodeTime(const std::string& entryID)
{
GlobalInodeTimestepMap::iterator iterTime = this->inodeTimes.find(entryID);
if(iterTime != this->inodeTimes.end() )
{
this->inodeTimes.erase(iterTime);
return true;
}
return false;
}
/**
* @return true if file is in the store, false if it is not
*/
bool GlobalInodeLockStore::lookupFileInode(EntryInfo* entryInfo)
{
RWLockGuard lock(rwlock, SafeRWLock_READ);
auto iter = this->inodes.find(entryInfo->getEntryID());
return (iter != this->inodes.end());
}
FileInode* GlobalInodeLockStore::getFileInode(EntryInfo* entryInfo)
{
RWLockGuard lock(rwlock, SafeRWLock_READ);
std::string entryID = entryInfo->getEntryID();
GlobalInodeLockMapIter iter = this->inodes.find(entryID);
if(iter != this->inodes.end() )
{
FileInode* inode = iter->second;
return inode;
}
return nullptr;
}
void GlobalInodeLockStore::clearLockStore()
{
LOG_DBG(GENERAL, DEBUG, "GlobalInodeLockStore::clearLockStore",
("# of loaded entries to be cleared", inodes.size()));
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
for(GlobalInodeLockMapIter iter = this->inodes.begin(); iter != this->inodes.end();)
{
delete(iter->second);
iter = this->inodes.erase(iter); // return next iterator after erase and assign to `iter` variable
}
//clear Timestep store
this->clearTimeStoreUnlocked();
}
void GlobalInodeLockStore::clearTimeStoreUnlocked()
{
for(GlobalInodeTimestepMap::iterator iter = this->inodeTimes.begin(); iter != this->inodeTimes.end();)
{
iter = this->inodeTimes.erase(iter); // return next iterator after erase and assign to `iter` variable
}
}

View File

@@ -0,0 +1,43 @@
#pragma once
#include <common/Common.h>
#include <storage/FileInode.h>
typedef std::map<std::string, FileInode*> GlobalInodeLockMap;
typedef GlobalInodeLockMap::iterator GlobalInodeLockMapIter;
typedef std::map<std::string, float> GlobalInodeTimestepMap;
/**
* Global store for file inodes which are locked for referencing.
* Used for chunk balancing operations.
* This object initalizes the GlobalInodeLockMap
* It is used for taking and releasing locks on the inodes of file chunks.
* * "locking" here means we successfully insert an element into a map.
*/
class GlobalInodeLockStore
{
friend class InodeFileStore;
friend class MetaStore;
public:
~GlobalInodeLockStore()
{
this->clearLockStore();
}
bool insertFileInode(EntryInfo* entryInfo);
bool releaseFileInode(const std::string& entryID);
bool lookupFileInode(EntryInfo* entryInfo);
private:
GlobalInodeLockMap inodes;
GlobalInodeTimestepMap inodeTimes;
FileInode* getFileInode(EntryInfo* entryInfo);
bool releaseInodeTime(const std::string& entryID);
RWLock rwlock;
void clearLockStore();
void clearTimeStoreUnlocked();
};

View File

@@ -0,0 +1,117 @@
#include "IncompleteInode.h"
#include <common/app/log/Logger.h>
#include <program/Program.h>
#include <sys/xattr.h>
#include <sys/types.h>
#include <unistd.h>
IncompleteInode::~IncompleteInode()
{
if (fd >= 0 && close(fd) < 0)
LOG(GENERAL, ERR, "Failed to close file", fd, sysErr);
}
FhgfsOpsErr IncompleteInode::setXattr(const char* name, const void* value, size_t size)
{
int setRes = ::fsetxattr(fd, name, value, size, 0);
if (setRes != 0)
return FhgfsOpsErrTk::fromSysErr(errno);
xattrsSet.insert(name);
return FhgfsOpsErr_SUCCESS;
}
FhgfsOpsErr IncompleteInode::setContent(const char* attrName, const void* value, size_t size)
{
if (Program::getApp()->getConfig()->getStoreUseExtendedAttribs())
return setXattr(attrName, value, size);
if (strcmp(attrName, META_XATTR_NAME) != 0)
{
LOG(GENERAL, ERR, "Setting attribute data as file contents is not supported.", attrName);
return FhgfsOpsErr_INVAL;
}
if (hasContent)
{
int truncRes = ::ftruncate(fd, size);
if (truncRes)
return FhgfsOpsErrTk::fromSysErr(errno);
}
ssize_t writeRes = ::write(fd, value, size);
if (writeRes < 0 || size_t(writeRes) < size)
return FhgfsOpsErrTk::fromSysErr(errno);
hasContent = true;
return FhgfsOpsErr_SUCCESS;
}
FhgfsOpsErr IncompleteInode::clearUnsetXAttrs()
{
std::set<std::string> xattrsOnInode;
char xattrNameBuf[XATTR_LIST_MAX];
const ssize_t listRes = ::flistxattr(fd, xattrNameBuf, sizeof(xattrNameBuf));
if (listRes < 0)
{
LOG(GENERAL, ERR, "could not list xattrs", fileName(), sysErr);
return FhgfsOpsErr_INTERNAL;
}
for (ssize_t offset = 0; offset < listRes; )
{
std::string currentName = xattrNameBuf + offset;
offset += currentName.size() + 1;
xattrsOnInode.insert(std::move(currentName));
}
for (auto it = xattrsOnInode.begin(); it != xattrsOnInode.end(); it++)
{
if (xattrsSet.count(*it))
continue;
// do not sync the system namespaces.
// * security. cannot be synced at all, since userspace can't write to it
// * trusted. is unused by us and thus not interesting
// * system. contains only acls, which we don't use to represent metadata
if (it->compare(0, 5, "user.") != 0)
continue;
int removeRes = ::fremovexattr(fd, it->c_str());
if (removeRes != 0)
{
LOG(GENERAL, ERR, "could not remove superfluous xattr",
fileName(), ("name", *it), sysErr);
return FhgfsOpsErr_INTERNAL;
}
}
return FhgfsOpsErr_SUCCESS;
}
std::string IncompleteInode::fileName() const
{
static const char* FD_FORMAT = "/proc/self/fd/%i";
// reserve enough space for the entire format string, the trailing \0, and the fd value
// in octal. in decimal, that will be more than enough.
char buffer[strlen(FD_FORMAT) + 3 * sizeof(fd) + 1];
::sprintf(buffer, FD_FORMAT, fd);
std::string result;
result.resize(PATH_MAX + 1);
ssize_t readRes = ::readlink(buffer, &result[0], PATH_MAX);
if (readRes < 0)
{
LOG(GENERAL, ERR, "Failed to resolve file for fd", fd, sysErr);
return "<resolve error>";
}
result.resize(readRes);
return result;
}

View File

@@ -0,0 +1,54 @@
#pragma once
#include <common/storage/StorageErrors.h>
class IncompleteInode
{
public:
IncompleteInode() : fd(-1), hasContent(false) {}
explicit IncompleteInode(int fd) : fd(fd), hasContent(false) {}
~IncompleteInode();
IncompleteInode(const IncompleteInode&) = delete;
IncompleteInode& operator=(const IncompleteInode&) = delete;
IncompleteInode(IncompleteInode&& other)
: fd(-1), hasContent(false)
{
swap(other);
}
IncompleteInode& operator=(IncompleteInode&& other)
{
IncompleteInode(std::move(other)).swap(*this);
return *this;
}
void swap(IncompleteInode& other)
{
std::swap(fd, other.fd);
std::swap(hasContent, other.hasContent);
std::swap(xattrsSet, other.xattrsSet);
}
friend void swap(IncompleteInode& a, IncompleteInode& b)
{
a.swap(b);
}
FhgfsOpsErr setXattr(const char* name, const void* value, size_t size);
FhgfsOpsErr setContent(const char* name, const void* value, size_t size);
FhgfsOpsErr clearUnsetXAttrs();
private:
int fd;
bool hasContent;
std::set<std::string> xattrsSet;
std::string fileName() const;
};

View File

@@ -0,0 +1,498 @@
#include <common/threading/RWLockGuard.h>
#include <common/threading/UniqueRWLock.h>
#include <program/Program.h>
#include <storage/PosixACL.h>
#include "DirInode.h"
#include "InodeDirStore.h"
#define DIRSTORE_REFCACHE_REMOVE_SKIP_SYNC (4) /* n-th elements to be removed on sync sweep */
#define DIRSTORE_REFCACHE_REMOVE_SKIP_ASYNC (3) /* n-th elements to be removed on async sweep */
/**
* not inlined as we need to include <program/Program.h>
*/
InodeDirStore::InodeDirStore()
{
Config* cfg = Program::getApp()->getConfig();
this->refCacheSyncLimit = cfg->getTuneDirMetadataCacheLimit();
this->refCacheAsyncLimit = refCacheSyncLimit - (refCacheSyncLimit/2);
}
bool InodeDirStore::dirInodeInStoreUnlocked(const std::string& dirID)
{
DirectoryMapIter iter = this->dirs.find(dirID);
return iter != this->dirs.end();
}
/**
* Note: remember to call releaseDir()
*
* @param forceLoad Force to load the DirInode from disk. Defaults to false.
* @return NULL if no such dir exists (or if the dir is being moved), but cannot be NULL if
* forceLoad is false
*/
DirInode* InodeDirStore::referenceDirInode(const std::string& dirID, bool isBuddyMirrored,
bool forceLoad)
{
DirInode* dir = NULL;
bool wasReferenced = true; /* Only try to add to cache if not in memory yet.
* Any attempt to add it to the cache causes a cache sweep, which is
* rather expensive.
* Note: when set to false we also need a write-lock! */
UniqueRWLock lock(rwlock, SafeRWLock_READ);
DirectoryMapIter iter;
iter = dirs.find(dirID);
if (iter == dirs.end())
{
lock.unlock();
lock.lock(SafeRWLock_WRITE);
iter = dirs.find(dirID);
}
if(iter == this->dirs.end() )
{ // Not in map yet => try to load it. We must be write-locked here!
iter = insertDirInodeUnlocked(dirID, isBuddyMirrored, forceLoad);
wasReferenced = false;
}
if(iter != this->dirs.end() )
{ // exists in map
DirectoryReferencer* dirRefer = iter->second;
DirInode* dirNonRef = dirRefer->getReferencedObject();
if(!dirNonRef->getExclusive() ) // check moving
{
dir = dirRefer->reference();
LOG_DBG(GENERAL, SPAM, "referenceDirInode", dir->getID(), dirRefer->getRefCount());
if (!wasReferenced)
cacheAddUnlocked(dirID, dirRefer);
}
// no "else".
// note: we don't need to check unload here, because exclusive means there already is a
// reference so we definitely didn't load here
}
lock.unlock();
/* Only try to load the DirInode after giving up the lock. DirInodes are usually referenced
* without being loaded at all from the kernel client, so we can afford the extra lock if loading
* the DirInode fails. */
if (forceLoad && dir && (!dir->loadIfNotLoaded()) )
{ // loading from disk failed, release the dir again
releaseDir(dirID);
dir = NULL;
}
return dir;
}
/**
* Release reduce the refcounter of an DirInode here
*/
void InodeDirStore::releaseDir(const std::string& dirID)
{
RWLockGuard lock(this->rwlock, SafeRWLock_WRITE);
releaseDirUnlocked(dirID);
}
void InodeDirStore::releaseDirUnlocked(const std::string& dirID)
{
App* app = Program::getApp();
DirectoryMapIter iter = this->dirs.find(dirID);
if(likely(iter != this->dirs.end() ) )
{ // dir exists => decrease refCount
DirectoryReferencer* dirRefer = iter->second;
if (likely(dirRefer->getRefCount() ) )
{
dirRefer->release();
DirInode* dirNonRef = dirRefer->getReferencedObject();
LOG_DBG(GENERAL, SPAM, "releaseDirInode", dirID, dirRefer->getRefCount());
if(!dirRefer->getRefCount() )
{ // dropped last reference => unload dir
if (unlikely( dirNonRef->fileStore.getSize() && !app->getSelfTerminate() ) )
{
LOG(GENERAL, ERR, "Bug: Refusing to release the directory, "
"its fileStore still has references!", dirID);
dirRefer->reference();
}
else
{ // as expected, fileStore is empty
delete(dirRefer);
this->dirs.erase(iter);
}
}
}
else
{ // attempt to release a Dir without a refCount
LOG(GENERAL, ERR, "Bug: Refusing to release dir with a zero refCount", dirID);
this->dirs.erase(iter);
}
}
else
{
LOG(GENERAL, ERR, "Bug: releaseDir requested, but dir not referenced!", dirID);
LogContext(__func__).logBacktrace();
}
}
FhgfsOpsErr InodeDirStore::removeDirInode(const std::string& dirID, bool isBuddyMirrored)
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
cacheRemoveUnlocked(dirID); /* we should move this after isRemovable()-check as soon as we can
remove referenced dirs */
FhgfsOpsErr removableRes = isRemovableUnlocked(dirID, isBuddyMirrored);
if(removableRes != FhgfsOpsErr_SUCCESS)
return removableRes;
bool persistenceOK = DirInode::unlinkStoredInode(dirID, isBuddyMirrored);
if(!persistenceOK)
return FhgfsOpsErr_INTERNAL;
return FhgfsOpsErr_SUCCESS;
}
/**
* Check whether the dir is removable (i.e. not in use).
*
* Note: Caller must make sure that there is no cache reference that would lead to a false in-use
* result.
*
* @param outMirrorNodeID if this returns success and the dir is mirrored, this param will be set to
* the mirror of this dir, i.e. !=0; (note: mirroring has actually nothing to do with removable
* check, but we have it here because this method already loads the dir inode, so that we can avoid
* another inode load in removeDirInodeUnlocked() for mirror checking.)
*/
FhgfsOpsErr InodeDirStore::isRemovableUnlocked(const std::string& dirID, bool isBuddyMirrored)
{
const char* logContext = "InodeDirStore check if dir is removable";
DirectoryMapCIter iter = dirs.find(dirID);
if(iter != dirs.end() )
{ // dir currently loaded, refuse to let it rmdir'ed
DirectoryReferencer* dirRefer = iter->second;
DirInode* dir = dirRefer->getReferencedObject();
if (unlikely(!dirRefer->getRefCount() ) )
{
LogContext(logContext).logErr("Bug: unreferenced dir found! EntryID: " + dirID);
return FhgfsOpsErr_INTERNAL;
}
dir->loadIfNotLoaded();
if(dir->getNumSubEntries() )
return FhgfsOpsErr_NOTEMPTY;
if(dir->getExclusive() ) // Someone else is already trying to delete it.
return FhgfsOpsErr_INUSE;
// Dir is still referenced (e.g. because someone is just trying to create a file in it.)
return FhgfsOpsErr_INUSE;
}
else
{ // not loaded => static checking
DirInode dirInode(dirID, isBuddyMirrored);
if (!dirInode.loadFromFile() )
return FhgfsOpsErr_PATHNOTEXISTS;
if(dirInode.getNumSubEntries() )
return FhgfsOpsErr_NOTEMPTY;
}
return FhgfsOpsErr_SUCCESS;
}
/**
* Note: This does not load any entries, so it will only return the number of already loaded
* entries. (Only useful for debugging and statistics probably.)
*/
size_t InodeDirStore::getSize()
{
RWLockGuard lock(rwlock, SafeRWLock_READ);
return dirs.size();
}
/**
* @param outParentNodeID may be NULL
* @param outParentEntryID may be NULL (if outParentNodeID is NULL)
*/
FhgfsOpsErr InodeDirStore::stat(const std::string& dirID, bool isBuddyMirrored,
StatData& outStatData, NumNodeID* outParentNodeID, std::string* outParentEntryID)
{
App* app = Program::getApp();
FhgfsOpsErr statRes = FhgfsOpsErr_PATHNOTEXISTS;
NumNodeID expectedOwnerID = isBuddyMirrored
? NumNodeID(app->getMetaBuddyGroupMapper()->getLocalGroupID() )
: app->getLocalNode().getNumID();
UniqueRWLock lock(rwlock, SafeRWLock_READ);
DirectoryMapIter iter = dirs.find(dirID);
if(iter != dirs.end() )
{ // dir loaded
DirectoryReferencer* dirRefer = iter->second;
DirInode* dir = dirRefer->getReferencedObject();
bool loadRes = dir->loadIfNotLoaded(); // might not be loaded at all...
if (!loadRes)
{
return statRes; /* Loading from disk failed, the dir is only referenced, but does not exist
* Might be due to our dir-reference optimization or a corruption problem */
}
if(expectedOwnerID != dir->getOwnerNodeID() )
{
/* check for owner node ID is especially important for root dir
* NOTE: this check is only performed, if directory is already loaded, because all MDSs
* need to be able to reference the root directory even if owner is not set (it is not set
* on first startup). */
LOG(GENERAL, WARNING, "Owner verification failed.", dirID);
statRes = FhgfsOpsErr_NOTOWNER;
}
else
{
statRes = dir->getStatData(outStatData, outParentNodeID, outParentEntryID);
}
lock.unlock();
}
else
{
lock.unlock(); /* Unlock first, no need to hold a lock to just read the file from disk.
* If xattr or data are not writte yet we just raced and return
* FhgfsOpsErr_PATHNOTEXISTS */
// read data on disk
statRes = DirInode::getStatData(dirID, isBuddyMirrored, outStatData, outParentNodeID,
outParentEntryID);
}
return statRes;
}
/**
* @param validAttribs SETATTR_CHANGE_...-Flags
*/
FhgfsOpsErr InodeDirStore::setAttr(const std::string& dirID, bool isBuddyMirrored, int validAttribs,
SettableFileAttribs* attribs)
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
DirectoryMapIter iter = dirs.find(dirID);
if(iter == dirs.end() )
{ // not loaded => load, apply, destroy
DirInode dir(dirID, isBuddyMirrored);
if(dir.loadFromFile() )
{ // loaded
bool setRes = dir.setAttrData(validAttribs, attribs);
return setRes ? FhgfsOpsErr_SUCCESS : FhgfsOpsErr_INTERNAL;
}
}
else
{ // dir loaded
DirectoryReferencer* dirRefer = iter->second;
DirInode* dir = dirRefer->getReferencedObject();
if(!dir->getExclusive() )
{
bool setRes = dir->setAttrData(validAttribs, attribs);
return setRes ? FhgfsOpsErr_SUCCESS : FhgfsOpsErr_INTERNAL;
}
}
return FhgfsOpsErr_PATHNOTEXISTS;
}
void InodeDirStore::invalidateMirroredDirInodes()
{
UniqueRWLock lock(rwlock, SafeRWLock_WRITE);
for (auto it = dirs.begin(); it != dirs.end(); ++it)
{
DirInode& dir = *it->second->getReferencedObject();
if (dir.getIsBuddyMirrored())
dir.invalidate();
}
}
/**
* Creates and empty DirInode and inserts it into the map.
*
* Note: We only need to hold a read-lock here, as we check if inserting an entry into the map
* succeeded.
*/
DirectoryMapIter InodeDirStore::insertDirInodeUnlocked(const std::string& dirID,
bool isBuddyMirrored, bool forceLoad)
{
std::unique_ptr<DirInode> inode(new (std::nothrow) DirInode(dirID, isBuddyMirrored));
if (unlikely (!inode) )
return dirs.end(); // out of memory
if (forceLoad)
{ // load from disk requested
if (!inode->loadIfNotLoaded() )
return dirs.end();
}
std::pair<DirectoryMapIter, bool> pairRes =
this->dirs.insert(DirectoryMapVal(dirID, new DirectoryReferencer(inode.release())));
return pairRes.first;
}
void InodeDirStore::clearStoreUnlocked()
{
LOG_DBG(GENERAL, DEBUG, "clearStoreUnlocked", dirs.size());
cacheRemoveAllUnlocked();
for(DirectoryMapIter iter = dirs.begin(); iter != dirs.end(); iter++)
{
DirectoryReferencer* dirRef = iter->second;
// will also call destructor for dirInode and sub-objects as dirInode->fileStore
delete(dirRef);
}
dirs.clear();
}
/**
* Note: Make sure to call this only after the new reference has been taken by the caller
* (otherwise it might happen that the new element is deleted during sweep if it was cached
* before and appears to be unneeded now).
*/
void InodeDirStore::cacheAddUnlocked(const std::string& dirID, DirectoryReferencer* dirRefer)
{
Config* cfg = Program::getApp()->getConfig();
unsigned cacheLimit = cfg->getTuneDirMetadataCacheLimit();
if (unlikely(cacheLimit == 0) )
return; // cache disabled by user config
// (we do cache sweeping before insertion to make sure we don't sweep the new entry)
cacheSweepUnlocked(true);
if(refCache.insert(DirCacheMapVal(dirID, dirRefer->getReferencedObject()) ).second)
{ // new insert => inc refcount
dirRefer->reference();
LOG_DBG(GENERAL, SPAM, "InodeDirStore cache add DirInode.", dirID, dirRefer->getRefCount());
}
}
void InodeDirStore::cacheRemoveUnlocked(const std::string& dirID)
{
DirCacheMapIter iter = refCache.find(dirID);
if(iter == refCache.end() )
return;
releaseDirUnlocked(dirID);
refCache.erase(iter);
}
void InodeDirStore::cacheRemoveAllUnlocked()
{
for(DirCacheMapIter iter = refCache.begin(); iter != refCache.end(); /* iter inc inside loop */)
{
releaseDirUnlocked(iter->first);
refCache.erase(iter++);
}
}
/**
* @param isSyncSweep true if this is a synchronous sweep (e.g. we need to free a few elements to
* allow quick insertion of a new element), false is this is an asynchronous sweep (that might take
* a bit longer).
* @return true if a cache flush was triggered, false otherwise
*/
bool InodeDirStore::cacheSweepUnlocked(bool isSyncSweep)
{
// sweeping means we remove every n-th element from the cache, starting with a random element
// in the range 0 to n
size_t cacheLimit;
size_t removeSkipNum;
// check type of sweep and set removal parameters accordingly
if(isSyncSweep)
{ // sync sweep settings
cacheLimit = refCacheSyncLimit;
removeSkipNum = DIRSTORE_REFCACHE_REMOVE_SKIP_SYNC;
}
else
{ // async sweep settings
cacheLimit = refCacheAsyncLimit;
removeSkipNum = DIRSTORE_REFCACHE_REMOVE_SKIP_ASYNC;
}
if(refCache.size() <= cacheLimit)
return false;
// pick a random start element (note: the element will be removed in first loop pass below)
unsigned randStart = randGen.getNextInRange(0, removeSkipNum - 1);
DirCacheMapIter iter = refCache.begin();
while(randStart--)
iter++;
// walk over all cached elements and remove every n-th element
DirCacheMapSizeT i = 0; // counts elements
while (iter != refCache.end() )
{
if ( (++i % removeSkipNum) == 0)
{
releaseDirUnlocked(iter->first);
refCache.erase(iter++);
}
else
iter++;
}
return true;
}
/**
* @return true if a cache flush was triggered, false otherwise
*/
bool InodeDirStore::cacheSweepAsync()
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
return cacheSweepUnlocked(false);
}
/**
* @return current number of cached entries
*/
size_t InodeDirStore::getCacheSize()
{
RWLockGuard lock(rwlock, SafeRWLock_READ);
return refCache.size();
}

View File

@@ -0,0 +1,94 @@
#pragma once
#include <common/Common.h>
#include <common/threading/Mutex.h>
#include <common/toolkit/AtomicObjectReferencer.h>
#include <common/toolkit/MetadataTk.h>
#include <common/toolkit/Random.h>
#include <common/storage/StatData.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageErrors.h>
class DirInode;
typedef AtomicObjectReferencer<DirInode*> DirectoryReferencer;
typedef std::map<std::string, DirectoryReferencer*> DirectoryMap;
typedef DirectoryMap::iterator DirectoryMapIter;
typedef DirectoryMap::const_iterator DirectoryMapCIter;
typedef DirectoryMap::value_type DirectoryMapVal;
typedef std::map<std::string, DirInode*> DirCacheMap; // keys are dirIDs (same as DirMap)
typedef DirCacheMap::iterator DirCacheMapIter;
typedef DirCacheMap::const_iterator DirCacheMapCIter;
typedef DirCacheMap::value_type DirCacheMapVal;
typedef DirCacheMap::size_type DirCacheMapSizeT;
/**
* Layer in between our inodes and the data on the underlying file system. So we read/write from/to
* underlying files and this class is to do this corresponding data access.
* This object is used for for _directories_ only.
*/
class InodeDirStore
{
friend class DirInode;
friend class MetaStore;
public:
InodeDirStore();
~InodeDirStore()
{
this->clearStoreUnlocked();
};
InodeDirStore(const InodeDirStore&) = delete;
InodeDirStore(InodeDirStore&&) = delete;
InodeDirStore& operator=(const InodeDirStore&) = delete;
InodeDirStore& operator=(InodeDirStore&&) = delete;
bool dirInodeInStoreUnlocked(const std::string& dirID);
DirInode* referenceDirInode(const std::string& dirID, bool isBuddyMirrored, bool forceLoad);
void releaseDir(const std::string& dirID);
FhgfsOpsErr removeDirInode(const std::string& dirID, bool isBuddyMirrored);
size_t getSize();
size_t getCacheSize();
FhgfsOpsErr stat(const std::string& dirID, bool isBuddyMirrored, StatData& outStatData,
NumNodeID* outParentNodeID, std::string* outParentEntryID);
FhgfsOpsErr setAttr(const std::string& dirID, bool isBuddyMirrored, int validAttribs,
SettableFileAttribs* attribs);
void invalidateMirroredDirInodes();
bool cacheSweepAsync();
private:
DirectoryMap dirs;
size_t refCacheSyncLimit; // synchronous access limit (=> async limit plus some grace size)
size_t refCacheAsyncLimit; // asynchronous cleanup limit (this is what the user configures)
Random randGen; // for random cache removal
DirCacheMap refCache;
RWLock rwlock;
void releaseDirUnlocked(const std::string& dirID);
FhgfsOpsErr isRemovableUnlocked(const std::string& dirID, bool isBuddyMirrored);
DirectoryMapIter insertDirInodeUnlocked(const std::string& dirID, bool isBuddyMirrored,
bool forceLoad);
FhgfsOpsErr setDirParent(EntryInfo* entryInfo, uint16_t parentNodeID);
void clearStoreUnlocked();
void cacheAddUnlocked(const std::string& dirID, DirectoryReferencer* dirRefer);
void cacheRemoveUnlocked(const std::string& dirID);
void cacheRemoveAllUnlocked();
bool cacheSweepUnlocked(bool isSyncSweep);
};

View File

@@ -0,0 +1,660 @@
#include <common/threading/UniqueRWLock.h>
#include <common/threading/RWLockGuard.h>
#include <program/Program.h>
#include "InodeFileStore.h"
/**
* check if the given ID is in the store
*
*/
bool InodeFileStore::isInStore(const std::string& fileID)
{
RWLockGuard lock(rwlock, SafeRWLock_READ);
return inodes.count(fileID) > 0;
}
/**
* Get the referencer and delete this ID from the map. Mainly used to move the referencer between
* Stores.
*/
FileInodeReferencer* InodeFileStore::getReferencerAndDeleteFromMap(const std::string& fileID)
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
InodeMapIter iter = this->inodes.find(fileID);
if(iter != inodes.end() )
{ // exists in map
auto fileRefer = iter->second;
this->inodes.erase(iter);
return fileRefer;
}
return nullptr;
}
/**
* References a file to be known to already referenced.
* Also could be called "referenceReferencedFile"
*
* Note: remember to call releaseFileInode()
*
* @param loadFromDisk - true for the per-directory InodeFileStore, false for references
* from MetaStore (global map)
* @return NULL if no such file exists
*/
FileInode* InodeFileStore::referenceLoadedFile(const std::string& entryID)
{
RWLockGuard lock(rwlock, SafeRWLock_READ);
InodeMapIter iter = this->inodes.find(entryID);
if(iter != this->inodes.end() )
return referenceFileInodeMapIterUnlocked(iter);
return nullptr;
}
/**
* Note: remember to call releaseFileInode()
*
* @param loadFromDisk - true for the per-directory InodeFileStore, false for references
* from MetaStore (global map)
* @param checkLockStore - true in most cases, triggers check if file is locked due
* to internal meta operations
* @return FileInode* and FhgfsOpsErr. FileInode* NULL if no such file exists.
*/
FileInodeRes InodeFileStore::referenceFileInode(EntryInfo* entryInfo, bool loadFromDisk, bool checkLockStore)
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
if (checkLockStore) // check bool for internal lock store
return referenceFileInodeUnlocked(entryInfo, loadFromDisk);
else
return referenceFileInodeUnlockedIgnoreLocking(entryInfo, loadFromDisk);
}
/**
* Note: this->rwlock needs to be write locked
* Note: We do not add a reference if isRename == true, but we set an exclusive flag and just
* return an unreferenced inode, which can be deleted anytime.
*/
FileInodeRes InodeFileStore::referenceFileInodeUnlocked(EntryInfo* entryInfo, bool loadFromDisk)
{
FileInode* inode = NULL;
FhgfsOpsErr retVal = FhgfsOpsErr_PATHNOTEXISTS;
InodeMapIter iter = this->inodes.find(entryInfo->getEntryID() );
if(iter == this->inodes.end() && loadFromDisk)
{ // not in map yet => check if in globalInodeLockStore
App* app = Program::getApp();
MetaStore* metaStore = app->getMetaStore();
GlobalInodeLockStore* inodeLockStore = metaStore->getInodeLockStore();
if (!inodeLockStore->lookupFileInode(entryInfo))
//not in globalInodeLockStore, try to load
{
loadAndInsertFileInodeUnlocked(entryInfo, iter);
}
else
{ //inode is in GlobalInodeLockStore
retVal = FhgfsOpsErr_INODELOCKED;
}
}
if(iter != this->inodes.end() )
{ // outInode exists
inode = referenceFileInodeMapIterUnlocked(iter);
retVal = FhgfsOpsErr_SUCCESS;
return {inode, retVal};
}
return {inode, retVal};
}
/**
* Note: this->rwlock needs to be write locked
* Note: We do not add a reference if isRename == true, but we set an exclusive flag and just
* return an unreferenced inode, which can be deleted anytime.
* Note: Variation of referenceFileInodeUnlocked().
* Bypasses the check for locked files due to internal meta operations.
*/
FileInodeRes InodeFileStore::referenceFileInodeUnlockedIgnoreLocking(EntryInfo* entryInfo, bool loadFromDisk)
{
FileInode* inode = NULL;
FhgfsOpsErr retVal = FhgfsOpsErr_PATHNOTEXISTS;
FileInodeRes FileInodeResPair = { inode, retVal};
InodeMapIter iter = this->inodes.find(entryInfo->getEntryID() );
if(iter == this->inodes.end() && loadFromDisk)
{ // not in map yet => try to load it
loadAndInsertFileInodeUnlocked(entryInfo, iter);
}
if(iter != this->inodes.end() )
{ // outInode exists
FileInodeResPair.first = referenceFileInodeMapIterUnlocked(iter);
FileInodeResPair.second = FhgfsOpsErr_SUCCESS;
return FileInodeResPair;
}
return FileInodeResPair;
}
/**
* Return an unreferenced inode object. The inode is also not exclusively locked.
*/
FhgfsOpsErr InodeFileStore::getUnreferencedInodeUnlocked(EntryInfo* entryInfo, FileInode*& outInode)
{
FileInode* inode = NULL;
FhgfsOpsErr retVal = FhgfsOpsErr_PATHNOTEXISTS;
InodeMapIter iter = this->inodes.find(entryInfo->getEntryID() );
if(iter == this->inodes.end() )
{ // not in map yet => try to load it.
loadAndInsertFileInodeUnlocked(entryInfo, iter);
}
if(iter != this->inodes.end() )
{ // outInode exists => check whether no references etc. exist
FileInodeReferencer* inodeRefer = iter->second;
inode = inodeRefer->getReferencedObject();
if(inodeRefer->getRefCount() ||
(inode->getExclusiveThreadID() && inode->getExclusiveThreadID() != System::getPosixTID() ) )
{
retVal = FhgfsOpsErr_INUSE;
inode = NULL;
/* note: We do not unload the outInode here (if we loaded it in this method),
* because freshly loaded inodes can't be referenced or exclusive and hence
* they cannot be "in use". */
}
else
retVal = FhgfsOpsErr_SUCCESS;
}
if (inode && inode->getNumHardlinks() > 1)
{ /* So the inode is not referenced and we set our exclusive lock. However, there are several
* hardlinks for this file. Currently only rename with a linkCount == 1 is supported! */
deleteUnreferencedInodeUnlocked(entryInfo->getEntryID() );
inode = NULL;
retVal = FhgfsOpsErr_INUSE;
}
outInode = inode;
return retVal;
}
/**
* referece an a file from InodeMapIter
* NOTE: iter should have been checked by the caller: iter != this->inodes.end()
*/
FileInode* InodeFileStore::referenceFileInodeMapIterUnlocked(InodeMapIter& iter)
{
if (unlikely(iter == inodes.end() ) )
return nullptr;
FileInodeReferencer* inodeRefer = iter->second;
FileInode* inodeNonRef = inodeRefer->getReferencedObject();
if(!inodeNonRef->getExclusiveThreadID() ||
inodeNonRef->getExclusiveThreadID() == System::getPosixTID() )
{
FileInode* inode = inodeRefer->reference();
// note: we don't need to check unload here, because exclusive means there already is a
// reference so we definitely didn't load here
LOG_DBG(GENERAL, SPAM, "Reference file inode.", ("FileInodeID", iter->first),
("Refcount", inodeRefer->getRefCount()));
return inode;
}
return nullptr;
}
/**
* Decrease the inode reference counter using the given iter.
*
* Note: InodeFileStore needs to be write-locked.
*
* @return number of inode references after release()
*/
unsigned InodeFileStore::decreaseInodeRefCountUnlocked(InodeMapIter& iter)
{
// decrease refount
FileInodeReferencer* inodeRefer = iter->second;
unsigned refCount = (unsigned) inodeRefer->release();
LOG_DBG(GENERAL, SPAM, "Release file inode.", ("FileInodeID", iter->first),
("Refcount", inodeRefer->getRefCount()));
if(!refCount)
{ // dropped last reference => unload outInode
delete(inodeRefer);
this->inodes.erase(iter);
}
return refCount;
}
/**
* Close the given file. Also updates the InodeFile on disk.
*/
bool InodeFileStore::closeFile(EntryInfo* entryInfo, FileInode* inode, unsigned accessFlags,
unsigned* outNumHardlinks, unsigned* outNumRefs, bool& outLastWriterClosed)
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
*outNumHardlinks = 1; // (we're careful here about inodes that are not currently open)
outLastWriterClosed = false;
InodeMapIter iter = this->inodes.find(inode->getEntryID() );
if (iter != this->inodes.end() )
{ // outInode exists
*outNumHardlinks = inode->getNumHardlinks();
// Store inode information on disk, they have been set with inode->setDynAttribs() before
entryInfo->setInodeInlinedFlag(inode->getIsInlined() );
inode->decNumSessionsAndStore(entryInfo, accessFlags);
// Here, we monitor the final closure of files opened for writing, including append mode.
// To identify when the last writer closes the file, we consider the following conditions:
// 1. Whether the file was originally opened with write, append, or read-write permissions.
// 2. Whether the count of write sessions associated with the file has dropped to zero.
if (!(accessFlags & OPENFILE_ACCESS_READ) && !inode->getNumSessionsWrite())
outLastWriterClosed = true;
*outNumRefs = decreaseInodeRefCountUnlocked(iter);
return true;
}
return false;
}
/**
* @param outNumHardlinks for quick on-close unlink check (may not be NULL!)
* @return false if file was not in store at all, true if we found the file in the store
*/
bool InodeFileStore::releaseFileInode(FileInode* inode)
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
InodeMapIter iter = this->inodes.find(inode->getEntryID() );
if(iter != this->inodes.end() )
{ // outInode exists => decrease refCount
decreaseInodeRefCountUnlocked(iter);
return true;
}
return false;
}
/**
* Check whether the file is unlinkable (not in use).
* Can either be used with a reference or a fileID.
*
* @return FhgfsOpsErr_SUCCESS when not in use, FhgfsOpsErr_INUSE when the inode is referenced and
* FhgfsOpsErr_PATHNOTEXISTS when it is exclusively locked.
*/
FhgfsOpsErr InodeFileStore::isUnlinkableUnlocked(EntryInfo* entryInfo)
{
std::string entryID = entryInfo->getEntryID();
InodeMapCIter iter = inodes.find(entryID);
if(iter != inodes.end() )
{
FileInodeReferencer* fileRefer = iter->second;
FileInode* inode = fileRefer->getReferencedObject();
if(fileRefer->getRefCount() )
return FhgfsOpsErr_INUSE;
else
if(inode->getExclusiveThreadID() && inode->getExclusiveThreadID() != System::getPosixTID() )
return FhgfsOpsErr_PATHNOTEXISTS; /* TODO: Not correct (if rename() fails), but will be
* FIXED using the per-file checkMap holds. */
}
return FhgfsOpsErr_SUCCESS;
}
FhgfsOpsErr InodeFileStore::isUnlinkable(EntryInfo* entryInfo)
{
RWLockGuard lock(rwlock, SafeRWLock_READ);
return this->isUnlinkableUnlocked(entryInfo);
}
/**
* @param outInode will be set to the unlinked file and the object must then be deleted by the
* caller (can be NULL if the caller is not interested in the file)
*/
FhgfsOpsErr InodeFileStore::unlinkFileInodeUnlocked(EntryInfo* entryInfo,
std::unique_ptr<FileInode>* outInode)
{
if(outInode)
outInode->reset();
std::string entryID = entryInfo->getEntryID();
FhgfsOpsErr unlinkableRes = isUnlinkableUnlocked(entryInfo);
if(unlinkableRes != FhgfsOpsErr_SUCCESS)
return unlinkableRes;
if (outInode)
{
outInode->reset(createUnreferencedInodeUnlocked(entryInfo));
if (!*outInode)
return FhgfsOpsErr_PATHNOTEXISTS;
}
bool unlinkRes = FileInode::unlinkStoredInodeUnlocked(entryID, entryInfo->getIsBuddyMirrored());
if(!unlinkRes)
{
if(outInode)
outInode->reset();
return FhgfsOpsErr_INTERNAL;
}
return FhgfsOpsErr_SUCCESS;
}
/**
* @param outFile will be set to the unlinked file and the object must then be deleted by the caller
* (can be NULL if the caller is not interested in the file)
*/
FhgfsOpsErr InodeFileStore::unlinkFileInode(EntryInfo* entryInfo,
std::unique_ptr<FileInode>* outInode)
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
return unlinkFileInodeUnlocked(entryInfo, outInode);
}
/**
* Note: This works by serializing the original and marking the object unreferencable (exclusive),
* so remember to call movingCancel() or movingComplete()
*
* @param buf target buffer for serialization (only valid if success is returned)
* @param bufLen must be at least META_SERBUF_SIZE
* @param outUsedBufLen the used bufLen
*/
FhgfsOpsErr InodeFileStore::moveRemoteBegin(EntryInfo* entryInfo, char* buf, size_t bufLen,
size_t* outUsedBufLen)
{
const char* logContext = "Serialize Inode";
if(bufLen < META_SERBUF_SIZE)
{
LogContext(logContext).log(Log_ERR, "Error: Buffer too small!");
return FhgfsOpsErr_INTERNAL;
}
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
FileInode* inode;
FhgfsOpsErr retVal = getUnreferencedInodeUnlocked(entryInfo, inode); // does not set refCount
if (retVal == FhgfsOpsErr_SUCCESS)
{
/* We got an inode, which is in the map, but is unreferenced. Now we are going to exclusively
* lock it. If another thread should try to reference it, it will fail due to this lock.
*/
inode->setExclusiveTID(System::getPosixTID() );
/* Set the origParentEntryID before serializing and moving the inode / file. It is not set
* by default to avoid additional diskData for most files.
*/
inode->setPersistentOrigParentID(entryInfo->getParentEntryID() );
Serializer ser(buf, bufLen);
inode->serializeMetaData(ser);
// Serialize RST info (if present)
if (inode->getIsRstAvailable())
ser % inode->rstInfo;
*outUsedBufLen = ser.size();
}
return retVal;
}
/**
* Finish the rename/move operation by deleting the inode object.
*/
void InodeFileStore::moveRemoteComplete(const std::string& entryID)
{
// moving succeeded => delete original
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
deleteUnreferencedInodeUnlocked(entryID);
}
/**
* Finish the rename/move operation by deleting the inode object.
*/
void InodeFileStore::deleteUnreferencedInodeUnlocked(const std::string& entryID)
{
InodeMapIter iter = inodes.find(entryID);
if(iter != inodes.end() )
{ // file exists
FileInodeReferencer* fileRefer = iter->second;
delete fileRefer;
inodes.erase(entryID);
}
}
/**
* Note: This does not load any entries, so it will only return the number of already loaded
* entries. (Only useful for debugging and statistics probably.)
*/
size_t InodeFileStore::getSize()
{
RWLockGuard lock(rwlock, SafeRWLock_READ);
return inodes.size();
}
/**
* Opens a file and increments the session count if the file is in an accessible state.
*
* This function references the file inode, checks if the file's state allows the requested
* access type, and increments the appropriate session counter if access is permitted.
* If the file is in a locked state, the function returns an error.
*
* Note: Open inodes are always also implicitly referenced.
*
* @param entryInfo entry info of the file to be opened
* @param accessFlags OPENFILE_ACCESS_... flags
* @param outInode Output parameter that will hold the referenced inode on success
* @param loadFromDisk - true for access from DirInode, false for access from MetaStore
*/
FhgfsOpsErr InodeFileStore::openFile(EntryInfo* entryInfo, unsigned accessFlags,
FileInode*& outInode, bool loadFromDisk, bool bypassAccessCheck)
{
FhgfsOpsErr referenceRes;
FileInodeRes FileInodeResPair = referenceFileInode(entryInfo, loadFromDisk, true);
outInode = FileInodeResPair.first;
referenceRes = FileInodeResPair.second;
if (!outInode)
return referenceRes;
FhgfsOpsErr openRes = outInode->checkAccessAndOpen(accessFlags, bypassAccessCheck);
if (openRes != FhgfsOpsErr_SUCCESS)
{
releaseFileInode(outInode);
outInode = nullptr;
return openRes;
}
return referenceRes;
}
/**
* get the statData of an inode
*
* @param loadFromDisk if false and the inode is not referenced yet
* we are not going to try to get data from disk.
*/
FhgfsOpsErr InodeFileStore::stat(EntryInfo* entryInfo, bool loadFromDisk, StatData& outStatData)
{
std::string entryID = entryInfo->getEntryID();
UniqueRWLock lock(rwlock, SafeRWLock_READ);
InodeMapIter iter = inodes.find(entryID);
if(iter != inodes.end() )
{ // inode loaded
FileInodeReferencer* fileRefer = iter->second;
FileInode* inode = fileRefer->getReferencedObject();
return inode->getStatData(outStatData);
}
lock.unlock(); // no need to keep the lock anymore
if (loadFromDisk)
{ // not loaded => static stat
return FileInode::getStatData(entryInfo, outStatData);
}
return FhgfsOpsErr_PATHNOTEXISTS;
}
/**
* @param validAttribs SETATTR_CHANGE_...-Flags.
*/
FhgfsOpsErr InodeFileStore::setAttr(EntryInfo* entryInfo, int validAttribs,
SettableFileAttribs* attribs)
{
std::string entryID = entryInfo->getEntryID();
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
InodeMapIter iter = inodes.find(entryID);
if(iter == inodes.end() )
{ // not loaded => load, apply, destroy
// Note: A very uncommon code path, as SetAttrMsgEx::setAttr() references the inode first.
std::unique_ptr<FileInode> inode(FileInode::createFromEntryInfo(entryInfo));
if(inode)
{ // loaded
bool setRes = inode->setAttrData(entryInfo, validAttribs, attribs);
if(likely(setRes) )
{ // attr update succeeded
return FhgfsOpsErr_SUCCESS;
}
else
return FhgfsOpsErr_INTERNAL;
}
}
else
{ // inode loaded
FileInodeReferencer* inodeRefer = iter->second;
FileInode* inode = inodeRefer->getReferencedObject();
if(!inode->getExclusiveThreadID() || inode->getExclusiveThreadID() == System::getPosixTID() )
{
bool setRes = inode->setAttrData(entryInfo, validAttribs, attribs);
if(likely(setRes) )
{ // attr update succeeded
return FhgfsOpsErr_SUCCESS;
}
else
return FhgfsOpsErr_INTERNAL;
}
}
return FhgfsOpsErr_PATHNOTEXISTS;
}
/**
* Loads a file and inserts it into the map.
*
* Note: Caller must make sure that the element wasn't in the map before.
*
* @return newElemIter only valid if true is returned, untouched otherwise
*/
bool InodeFileStore::loadAndInsertFileInodeUnlocked(EntryInfo* entryInfo, InodeMapIter& newElemIter)
{
FileInode* inode = FileInode::createFromEntryInfo(entryInfo);
if(!inode)
return false;
std::string entryID = entryInfo->getEntryID();
newElemIter = inodes.insert(InodeMapVal(entryID, new FileInodeReferencer(inode) ) ).first;
return true;
}
/**
* Insert the existing FileInodeReferencer into the map.
*
* This is mainly required to move references between stores (per-directory to metaStore)
*/
bool InodeFileStore::insertReferencer(std::string entryID, FileInodeReferencer* fileRefer)
{
RWLockGuard lock(rwlock, SafeRWLock_WRITE);
return this->inodes.insert(InodeMapVal(entryID, fileRefer) ).second;
}
void InodeFileStore::clearStoreUnlocked()
{
App* app = Program::getApp();
LOG_DBG(GENERAL, DEBUG, "InodeFileStore::clearStoreUnlocked",
("# of loaded entries to be cleared", inodes.size()));
for(InodeMapIter iter = inodes.begin(); iter != inodes.end(); iter++)
{
FileInode* file = iter->second->getReferencedObject();
if(unlikely(file->getNumSessionsAll() ) )
{ // check whether file was still open
LOG_DBG(GENERAL, DEBUG, "File was still open during shutdown.", file->getEntryID(),
file->getNumSessionsAll());
if (!app->getSelfTerminate() )
LogContext(__func__).logBacktrace();
}
delete(iter->second);
}
inodes.clear();
}
/**
* Increase or decrease the link count of an inode by the given value
*/
FhgfsOpsErr InodeFileStore::incDecLinkCount(FileInode& inode, EntryInfo* entryInfo, int value)
{
std::string entryID = entryInfo->getEntryID();
bool setRes = inode.incDecNumHardLinks(entryInfo, value);
if(likely(setRes) )
{ // update succeeded
return FhgfsOpsErr_SUCCESS;
}
else
return FhgfsOpsErr_INTERNAL;
}

View File

@@ -0,0 +1,115 @@
#pragma once
#include <common/toolkit/ObjectReferencer.h>
#include <common/Common.h>
#include <common/threading/Mutex.h>
#include <common/toolkit/MetadataTk.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageErrors.h>
#include <storage/GlobalInodeLockStore.h>
#include "FileInode.h"
typedef ObjectReferencer<FileInode*> FileInodeReferencer;
typedef std::map<std::string, FileInodeReferencer*> InodeMap;
typedef InodeMap::iterator InodeMapIter;
typedef InodeMap::const_iterator InodeMapCIter;
typedef InodeMap::value_type InodeMapVal;
typedef std::pair<FileInode*, FhgfsOpsErr> FileInodeRes;
/**
* Layer in between our inodes and the data on the underlying file system. So we read/write from/to
* underlying inodes and this class is to do this corresponding data access.
* This object is used for all file types, for example regular files, but NOT directories.
*/
class InodeFileStore
{
friend class DirInode;
friend class MetaStore;
public:
InodeFileStore() {}
~InodeFileStore()
{
this->clearStoreUnlocked();
}
bool isInStore(const std::string& fileID);
FileInodeRes referenceFileInode(EntryInfo* entryInfo, bool loadFromDisk, bool checkLockStore);
FileInode* referenceLoadedFile(const std::string& entryID);
bool releaseFileInode(FileInode* inode);
FhgfsOpsErr unlinkFileInode(EntryInfo* entryInfo, std::unique_ptr<FileInode>* outInode);
void unlinkAllFiles();
FhgfsOpsErr moveRemoteBegin(EntryInfo* entryInfo, char* buf, size_t bufLen,
size_t* outUsedBufLen);
void moveRemoteComplete(const std::string& entryID);
size_t getSize();
bool closeFile(EntryInfo* entryInfo, FileInode* inode, unsigned accessFlags,
unsigned* outNumHardlinks, unsigned* outNumRefs, bool& outLastWriterClosed);
FhgfsOpsErr openFile(EntryInfo* entryInfo, unsigned accessFlags,
FileInode*& outInode, bool loadFromDisk, bool bypassAccessCheck);
FhgfsOpsErr stat(EntryInfo* entryInfo, bool loadFromDisk, StatData& outStatData);
FhgfsOpsErr setAttr(EntryInfo* entryInfo, int validAttribs, SettableFileAttribs* attribs);
FhgfsOpsErr isUnlinkable(EntryInfo* entryInfo);
private:
InodeMap inodes;
RWLock rwlock;
unsigned decreaseInodeRefCountUnlocked(InodeMapIter& iter);
FileInodeRes referenceFileInodeUnlocked(EntryInfo* entryInfo, bool loadFromDisk);
FileInodeRes referenceFileInodeUnlockedIgnoreLocking(EntryInfo* entryInfo, bool loadFromDisk);
FhgfsOpsErr getUnreferencedInodeUnlocked(EntryInfo* entryInfo, FileInode*& outInode);
void deleteUnreferencedInodeUnlocked(const std::string& entryID);
FhgfsOpsErr isUnlinkableUnlocked(EntryInfo* entryInfo);
FhgfsOpsErr unlinkFileInodeUnlocked(EntryInfo* entryInfo,
std::unique_ptr<FileInode>* outInode);
bool loadAndInsertFileInodeUnlocked(EntryInfo* entryInfo, InodeMapIter& newElemIter);
bool insertReferencer(std::string entryID, FileInodeReferencer* fileRefer);
FileInodeReferencer* getReferencerAndDeleteFromMap(const std::string& fileID);
void clearStoreUnlocked();
FileInode* referenceFileInodeMapIterUnlocked(InodeMapIter& iter);
FhgfsOpsErr incDecLinkCount(FileInode& inode, EntryInfo* entryInfo, int value);
public:
// inliners
FhgfsOpsErr incLinkCount(FileInode& inode, EntryInfo* entryInfo)
{
return incDecLinkCount(inode, entryInfo, 1);
}
FhgfsOpsErr decLinkCount(FileInode& inode, EntryInfo* entryInfo)
{
return incDecLinkCount(inode, entryInfo, -1);
}
private:
// inliners
/**
* Create an unreferenced file inode from an existing inode on disk disk.
*/
FileInode* createUnreferencedInodeUnlocked(EntryInfo* entryInfo)
{
return FileInode::createFromEntryInfo(entryInfo);
}
};

View File

@@ -0,0 +1,50 @@
#include <common/toolkit/serialization/Serialization.h>
#include <common/toolkit/Random.h>
#include <common/toolkit/StringTk.h>
#include "Locking.h"
bool EntryLockDetails::operator==(const EntryLockDetails& other) const
{
return clientNumID == other.clientNumID
&& clientFD == other.clientFD
&& ownerPID == other.ownerPID
&& lockAckID == other.lockAckID
&& lockTypeFlags == other.lockTypeFlags;
}
bool RangeLockDetails::operator==(const RangeLockDetails& other) const
{
return clientNumID == other.clientNumID
&& ownerPID == other.ownerPID
&& lockAckID == other.lockAckID
&& lockTypeFlags == other.lockTypeFlags
&& start == other.start
&& end == other.end;
}
void EntryLockDetails::initRandomForSerializationTests()
{
Random rand;
this->clientFD = rand.getNextInt();
this->clientNumID = NumNodeID(rand.getNextInt() );
StringTk::genRandomAlphaNumericString(this->lockAckID, rand.getNextInRange(0, 30) );
this->lockTypeFlags = rand.getNextInt();
this->ownerPID = rand.getNextInt();
}
void RangeLockDetails::initRandomForSerializationTests()
{
Random rand;
this->clientNumID = NumNodeID(rand.getNextInt() );
this->end = rand.getNextInt();
StringTk::genRandomAlphaNumericString(this->lockAckID, rand.getNextInRange(0, 30) );
this->lockTypeFlags = rand.getNextInt();
this->ownerPID = rand.getNextInt();
this->start = rand.getNextInt();
}

View File

@@ -0,0 +1,490 @@
#pragma once
#include <common/Common.h>
#include <common/nodes/NumNodeID.h>
#include <common/storage/StorageDefinitions.h>
/**
* The file lock type, e.g. append lock or entry lock (=>flock) for LockEntryNotificationWork.
*/
enum LockEntryNotifyType
{
LockEntryNotifyType_APPEND = 0,
LockEntryNotifyType_FLOCK,
};
enum RangeOverlapType
{
RangeOverlapType_NONE=0,
RangeOverlapType_EQUALS=1, // ranges are equal
RangeOverlapType_CONTAINS=2, // first range wraps around second range
RangeOverlapType_ISCONTAINED=3, // first range is contained within second range
RangeOverlapType_STARTOVERLAP=4, // second range overlaps start of first range
RangeOverlapType_ENDOVERLAP=5 // second range overlaps end of first range
};
/**
* Entry-locks are treated per FD, e.g. a single process will block itself when it holds an
* exclusive lock and tries to get another exclusive lock via another file descriptor. That's why
* we compare clientID and clientFD. (This is different for range-locks.)
*/
struct EntryLockDetails
{
/**
* @param lockTypeFlags ENTRYLOCKTYPE_...
*/
EntryLockDetails(NumNodeID clientNumID, int64_t clientFD, int ownerPID,
const std::string& lockAckID, int lockTypeFlags):
clientNumID(clientNumID), clientFD(clientFD), ownerPID(ownerPID), lockAckID(lockAckID),
lockTypeFlags(lockTypeFlags)
{ }
/**
* Constructor for unset lock (empty clientID; other fields not init'ed).
*/
EntryLockDetails() {}
NumNodeID clientNumID;
int64_t clientFD; // unique handle on the corresponding client
int32_t ownerPID; // pid on client (just informative, because shared on fork() )
std::string lockAckID; /* ID for ack message when log is granted (and to identify duplicate
requests, so it must be a globally unique ID) */
int32_t lockTypeFlags; // ENTRYLOCKTYPE_...
template<typename This, typename Ctx>
static void serialize(This obj, Ctx& ctx)
{
ctx
% obj->clientNumID
% obj->clientFD
% obj->ownerPID
% serdes::stringAlign4(obj->lockAckID)
% obj->lockTypeFlags;
}
bool operator==(const EntryLockDetails& other) const;
bool operator!=(const EntryLockDetails& other) const { return !(*this == other); }
void initRandomForSerializationTests();
bool isSet() const
{
return bool(clientNumID);
}
bool allowsWaiting() const
{
return !(lockTypeFlags & ENTRYLOCKTYPE_NOWAIT);
}
bool isUnlock() const
{
return (lockTypeFlags & ENTRYLOCKTYPE_UNLOCK);
}
void setUnlock()
{
lockTypeFlags |= ENTRYLOCKTYPE_UNLOCK;
lockTypeFlags &= ~(ENTRYLOCKTYPE_EXCLUSIVE | ENTRYLOCKTYPE_SHARED | ENTRYLOCKTYPE_CANCEL);
}
bool isExclusive() const
{
return (lockTypeFlags & ENTRYLOCKTYPE_EXCLUSIVE);
}
bool isShared() const
{
return (lockTypeFlags & ENTRYLOCKTYPE_SHARED);
}
bool isCancel() const
{
return (lockTypeFlags & ENTRYLOCKTYPE_CANCEL);
}
NumNodeID getClientNumID()
{
return clientNumID;
}
/**
* Compares clientID and clientFD.
*/
bool equalsHandle(const EntryLockDetails& other) const
{
// note: if you make changes here, you (probably) also need to change the MapComparator
return (clientFD == other.clientFD) && (clientNumID == other.clientNumID);
}
std::string toString() const
{
std::ostringstream outStream;
outStream <<
"clientNumID: " << clientNumID << "; " <<
"clientFD: " << clientFD << "; " <<
"ownerPID: " << ownerPID << "; " <<
"lockAckID: " << lockAckID << "; " <<
"lockTypeFlags: ";
// append lockTypeFlags
if(isUnlock() )
outStream << "u";
if(isShared() )
outStream << "s";
if(isExclusive() )
outStream << "x";
if(isCancel() )
outStream << "c";
if(allowsWaiting() )
outStream << "w";
return outStream.str();
}
struct MapComparator
{
/**
* Order by file handle.
*
* @return true if a is smaller than b
*/
bool operator() (const EntryLockDetails& a, const EntryLockDetails& b) const
{
return (a.clientFD < b.clientFD) ||
( (a.clientFD == b.clientFD) && (a.clientNumID < b.clientNumID) );
}
};
};
typedef std::set<EntryLockDetails, EntryLockDetails::MapComparator> EntryLockDetailsSet;
typedef EntryLockDetailsSet::iterator EntryLockDetailsSetIter;
typedef EntryLockDetailsSet::const_iterator EntryLockDetailsSetCIter;
typedef std::list<EntryLockDetails> EntryLockDetailsList;
typedef EntryLockDetailsList::iterator EntryLockDetailsListIter;
typedef EntryLockDetailsList::const_iterator EntryLockDetailsListCIter;
/**
* A simple container for flock/append entry lock queue pointers.
*
* We have this to easily pass the different flock and append queues to the corresponding generic
* lock management methods.
*/
class EntryLockQueuesContainer
{
public:
EntryLockQueuesContainer(EntryLockDetails* exclLock, EntryLockDetailsSet* sharedLocks,
EntryLockDetailsList* waitersExclLock, EntryLockDetailsList* waitersSharedLock,
StringSet* waitersLockIDs, LockEntryNotifyType lockType = LockEntryNotifyType_FLOCK) :
exclLock(exclLock), sharedLocks(sharedLocks), waitersExclLock(waitersExclLock),
waitersSharedLock(waitersSharedLock), waitersLockIDs(waitersLockIDs), lockType(lockType)
{
// (all inits done in initializer list)
}
EntryLockDetails* exclLock; // current exclusiveTID lock
EntryLockDetailsSet* sharedLocks; // current shared locks (key is lock, value is dummy)
EntryLockDetailsList* waitersExclLock; // queue (append new to end, pop from top)
EntryLockDetailsList* waitersSharedLock; // queue (append new to end, pop from top)
StringSet* waitersLockIDs; // currently enqueued lockIDs (for fast duplicate check)
LockEntryNotifyType lockType; // to be passed to LockingNotifier
};
/**
* A simple container for append entry lock queue pointers
*
* This contains dummy queues for shared locks, because append does not use shared locks, but the
* generic lock management methods expect them to exist (which could be optimized out if necessary).
*/
class AppendLockQueuesContainer : public EntryLockQueuesContainer
{
public:
AppendLockQueuesContainer(EntryLockDetails* exclLock, EntryLockDetailsList* waitersExclLock,
StringSet* waitersLockIDsFLock) :
EntryLockQueuesContainer(exclLock, &sharedLocksDummy, waitersExclLock,
&waitersSharedLockDummy, waitersLockIDsFLock, LockEntryNotifyType_APPEND)
{
// (all inits done in initializer list)
}
private:
EntryLockDetailsSet sharedLocksDummy; // dummy, because append uses exclusive only
EntryLockDetailsList waitersSharedLockDummy; // dummy, because append uses exclusive only
};
/**
* Range-locks are treated per-process, e.g. independent of different file descriptors or threads,
* a process cannot block itself with two exclusive locks. That's why we only compare clientID and
* ownerPID here. (This is different for entry-locks.)
*/
struct RangeLockDetails
{
RangeLockDetails(NumNodeID clientNumID, int ownerPID, const std::string& lockAckID,
int lockTypeFlags, uint64_t start, uint64_t end) :
clientNumID(clientNumID), ownerPID(ownerPID), lockAckID(lockAckID),
lockTypeFlags(lockTypeFlags), start(start), end(end)
{ }
/**
* Constructor for unset lock (empty clientID; other fields not init'ed).
*/
RangeLockDetails() {}
NumNodeID clientNumID;
int32_t ownerPID; // pid on client
std::string lockAckID; /* ID for ack message when log is granted (and to identify duplicate
requests, so it must be globally unique) */
int32_t lockTypeFlags; // ENTRYLOCKTYPE_...
uint64_t start;
uint64_t end; // inclusive end
template<typename This, typename Ctx>
static void serialize(This obj, Ctx& ctx)
{
ctx
% obj->clientNumID
% obj->ownerPID
% serdes::stringAlign4(obj->lockAckID)
% obj->lockTypeFlags
% obj->start
% obj->end;
}
bool operator==(const RangeLockDetails& other) const;
bool operator!=(const RangeLockDetails& other) const { return !(*this == other); }
void initRandomForSerializationTests();
bool isSet() const
{
return bool(clientNumID);
}
bool allowsWaiting() const
{
return !(lockTypeFlags & ENTRYLOCKTYPE_NOWAIT);
}
bool isUnlock() const
{
return (lockTypeFlags & ENTRYLOCKTYPE_UNLOCK);
}
void setUnlock()
{
lockTypeFlags |= ENTRYLOCKTYPE_UNLOCK;
lockTypeFlags &= ~(ENTRYLOCKTYPE_EXCLUSIVE | ENTRYLOCKTYPE_SHARED | ENTRYLOCKTYPE_CANCEL);
}
bool isExclusive() const
{
return (lockTypeFlags & ENTRYLOCKTYPE_EXCLUSIVE);
}
bool isShared() const
{
return (lockTypeFlags & ENTRYLOCKTYPE_SHARED);
}
bool isCancel() const
{
return (lockTypeFlags & ENTRYLOCKTYPE_CANCEL);
}
/**
* Compares clientID and ownerPID.
*/
bool equalsHandle(const RangeLockDetails& other) const
{
// note: if you make changes here, you (probably) also need to change the MapComparators
return (ownerPID == other.ownerPID) && (clientNumID == other.clientNumID);
}
/**
* Check if ranges overlap or directly extend each other.
* Note: this just checks ranges, not owner (which is good, because we rely on that at some
* points in the code)
*/
bool isMergeable(const RangeLockDetails& other) const
{
// check if other region ends before this one or starts after this one
// (+1 is because we are not only looking for overlaps, but also for extensions)
if( ( (other.end+1) < start) || (other.start > (end+1) ) )
return false;
// other range overlaps or is directly extending this range
return true;
}
/**
* Check if ranges have common values.
*/
bool overlaps(const RangeLockDetails& other) const
{
// check if other region ends before this one or starts after this one
if( (other.end < start) || (other.start > end) )
return false;
// other range not before or after this one => overlap
return true;
}
RangeOverlapType overlapsEx(const RangeLockDetails& other) const
{
if( (other.end < start) || (other.start > end) )
return RangeOverlapType_NONE; // other region ends before this one or starts after this one
if( (other.start == start) && (other.end == end) )
return RangeOverlapType_EQUALS;
if( (start <= other.start) && (end >= other.end) )
return RangeOverlapType_CONTAINS; // this range contains other range
if( (start >= other.start) && (end <= other.end) )
return RangeOverlapType_ISCONTAINED; // this range is contained in other range
if(start < other.start)
return RangeOverlapType_STARTOVERLAP; // other range overlaps at start of this range
return RangeOverlapType_ENDOVERLAP; // other range overlaps at end of this range
}
/**
* Trim the part of this lock that overlaps with trimmer.
* The caller must make sure that the resulting region has "length>0" and that this is a
* real one-sided overlap (none of the regions contains the other except if start or end match).
*/
void trim(const RangeLockDetails& trimmer)
{
if(trimmer.end < end)
start = trimmer.end+1; // trim on start-side
else
end = trimmer.start-1; // trim on end-side
}
/**
* Split this region by the splitter region.
* The result for this will be the remaining left side of splitter, outEndRegion will be set to
* the remaining right side of splitter.
* The caller must make sure that both resulting regions have "length>0", so splitter must be
* completely contained in this and start/end of this/splitter may not match.
*/
void split(const RangeLockDetails& splitter, RangeLockDetails& outEndRegion)
{
// right side remainder
outEndRegion = *this;
outEndRegion.start = splitter.end+1;
// trim left side remainder
end = splitter.start-1;
}
/**
* Extends this region by the dimension of other region.
* Caller must make sure that the regions actually do overlap.
*/
void merge(const RangeLockDetails& other)
{
start = BEEGFS_MIN(start, other.start);
end = BEEGFS_MAX(end, other.end);
}
std::string toString() const
{
std::ostringstream outStream;
outStream <<
"clientNumID: " << clientNumID << "; " <<
"PID: " << ownerPID << "; " <<
"lockAckID: " << lockAckID << "; " <<
"range: " << start << " - " << end << "; " <<
"lockTypeFlags: ";
// append lockTypeFlags
if(isUnlock() )
outStream << "u";
if(isShared() )
outStream << "s";
if(isExclusive() )
outStream << "x";
if(isCancel() )
outStream << "c";
if(allowsWaiting() )
outStream << "w";
return outStream.str();
}
struct MapComparatorShared
{
/**
* Order by file handle, then range start (to make overlap detection for same handle easy).
*
* @return true if a is smaller than b
*/
bool operator() (const RangeLockDetails& a, const RangeLockDetails& b) const
{
if(a.ownerPID < b.ownerPID)
return true;
if(a.ownerPID > b.ownerPID)
return false;
// equal ownerPID
if(a.clientNumID < b.clientNumID)
return true;
if(a.clientNumID > b.clientNumID)
return false;
// equal clientID
if(a.start < b.start)
return true;
return false;
}
};
struct MapComparatorExcl
{
/**
* Order by range start (to make general overlap detection easy).
*
* @return true if a is smaller than b
*/
bool operator() (const RangeLockDetails& a, const RangeLockDetails& b) const
{
// note: this only works because we cannot have range overlaps for exclusive locks
return (a.start < b.start);
}
};
};
typedef std::set<RangeLockDetails, RangeLockDetails::MapComparatorShared> RangeLockSharedSet;
typedef RangeLockSharedSet::iterator RangeLockSharedSetIter;
typedef RangeLockSharedSet::const_iterator RangeLockSharedSetCIter;
typedef std::set<RangeLockDetails, RangeLockDetails::MapComparatorExcl> RangeLockExclSet;
typedef RangeLockExclSet::iterator RangeLockExclSetIter;
typedef RangeLockExclSet::const_iterator RangeLockExclSetCIter;
typedef std::list<RangeLockDetails> RangeLockDetailsList;
typedef RangeLockDetailsList::iterator RangeLockDetailsListIter;
typedef RangeLockDetailsList::const_iterator RangeLockDetailsListCIter;

View File

@@ -0,0 +1,65 @@
#pragma once
#include <storage/DirInode.h>
#include <storage/FileInode.h>
class MetaStore;
// this class is used to transport referenced file inode out of and back into the meta store.
// since file inodes must keep their parent directory alive, a handle to an inode also requires a
// handle to a directory to function properly.
class MetaFileHandle
{
friend class MetaStore;
public:
typedef void (MetaFileHandle::*bool_type)();
MetaFileHandle():
inode(nullptr), parent(nullptr)
{}
MetaFileHandle(FileInode* inode, DirInode* parent):
inode(inode), parent(parent)
{}
MetaFileHandle(const MetaFileHandle&) = delete;
MetaFileHandle& operator=(const MetaFileHandle&) = delete;
MetaFileHandle(MetaFileHandle&& other):
inode(nullptr), parent(nullptr)
{
swap(other);
}
MetaFileHandle& operator=(MetaFileHandle&& other)
{
MetaFileHandle(std::move(other)).swap(*this);
return *this;
}
FileInode* operator->() const { return inode; }
FileInode& operator*() const { return *inode; }
FileInode* get() const { return inode; }
operator bool_type() const { return inode ? &MetaFileHandle::bool_value : 0; }
void swap(MetaFileHandle& other)
{
std::swap(inode, other.inode);
std::swap(parent, other.parent);
}
friend void swap(MetaFileHandle& a, MetaFileHandle& b)
{
a.swap(b);
}
private:
FileInode* inode;
DirInode* parent;
void bool_value() {}
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,202 @@
#pragma once
#include <common/fsck/FsckDirInode.h>
#include <common/storage/striping/StripePattern.h>
#include <common/storage/RemoteStorageTarget.h>
#include <common/storage/Metadata.h>
#include <common/storage/StorageDefinitions.h>
#include <common/storage/StorageErrors.h>
#include <common/storage/EntryInfo.h>
#include <common/threading/SafeRWLock.h>
#include <common/threading/Atomics.h>
#include <common/toolkit/FsckTk.h>
#include <common/Common.h>
#include <storage/GlobalInodeLockStore.h>
#include <storage/IncompleteInode.h>
#include <storage/MkFileDetails.h>
#include <session/EntryLock.h>
#include "DirEntry.h"
#include "InodeDirStore.h"
#include "InodeFileStore.h"
#include "MetadataEx.h"
#include "MetaFileHandle.h"
typedef std::pair<MetaFileHandle, FhgfsOpsErr> MetaFileHandleRes;
/*
* This is the main class for all client side posix io operations regarding the meta server.
* So client side net message will do io via this class.
*/
class MetaStore
{
public:
DirInode* referenceDir(const std::string& dirID, const bool isBuddyMirrored,
const bool forceLoad);
void releaseDir(const std::string& dirID);
MetaFileHandleRes referenceFile(EntryInfo* entryInfo, bool checkLockStore = true);
MetaFileHandle referenceLoadedFile(const std::string& parentEntryID,
bool parentIsBuddyMirrored, const std::string& entryID);
bool releaseFile(const std::string& parentEntryID, MetaFileHandle& inode);
bool referenceInode(const std::string& entryID, bool isBuddyMirrored,
MetaFileHandle& outFileInode, DirInode*& outDirInode);
FhgfsOpsErr openFile(EntryInfo* entryInfo, unsigned accessFlags, bool bypassAccessCheck,
MetaFileHandle& outInode, bool checkDisposalFirst = false);
void closeFile(EntryInfo* entryInfo, MetaFileHandle inode, unsigned accessFlags,
unsigned* outNumHardlinks, unsigned* outNumRefs, bool& outLastWriterClosed);
FhgfsOpsErr stat(EntryInfo* entryInfo, bool loadFromDisk, StatData& outStatData,
NumNodeID* outParentNodeID = NULL, std::string* outParentEntryID = NULL);
FhgfsOpsErr setAttr(EntryInfo* entryInfo, int validAttribs, SettableFileAttribs* attribs);
FhgfsOpsErr incDecLinkCount(EntryInfo* entryInfo, int value);
FhgfsOpsErr setDirParent(EntryInfo* entryInfo, NumNodeID parentNodeID);
FhgfsOpsErr mkNewMetaFile(DirInode& dir, MkFileDetails* mkDetails,
std::unique_ptr<StripePattern> stripePattern, RemoteStorageTarget* rstInfo,
EntryInfo* outEntryInfo, FileInodeStoreData* outInodeData);
FhgfsOpsErr makeDirInode(DirInode& inode);
FhgfsOpsErr makeDirInode(DirInode& inode, const CharVector& defaultACLXAttr,
const CharVector& accessACLXAttr);
FhgfsOpsErr removeDirInode(const std::string& entryID, bool isBuddyMirrored);
FhgfsOpsErr unlinkInode(EntryInfo* entryInfo, std::unique_ptr<FileInode>* outInode);
FhgfsOpsErr fsckUnlinkFileInode(const std::string& entryID, bool isBuddyMirrored);
FhgfsOpsErr unlinkFile(DirInode& dir, const std::string& fileName,
EntryInfo* outEntryInfo, std::unique_ptr<FileInode>* outInode, unsigned& outNumHardlinks);
FhgfsOpsErr unlinkFileInode(EntryInfo* delFileInfo, std::unique_ptr<FileInode>* outInode,
unsigned& outNumHardlinks);
FhgfsOpsErr unlinkInodeLater(EntryInfo* entryInfo, bool wasInlined);
FhgfsOpsErr renameInSameDir(DirInode& parentDir, const std::string& fromName,
const std::string& toName, std::unique_ptr<FileInode>* outUnlinkInode,
DirEntry*& outOverWrittenEntry, bool& outUnlinkedWasInlined);
FhgfsOpsErr moveRemoteFileInsert(EntryInfo* fromFileInfo, DirInode& toParent,
const std::string& newEntryName, const char* buf, uint32_t bufLen,
std::unique_ptr<FileInode>* outUnlinkedInode, EntryInfo* overWriteInfo, EntryInfo& newFileInfo);
FhgfsOpsErr moveRemoteFileBegin(DirInode& dir, EntryInfo* entryInfo, char* buf, size_t bufLen,
size_t* outUsedBufLen);
void moveRemoteFileComplete(DirInode& dir, const std::string& entryID);
FhgfsOpsErr getAllInodesIncremental(unsigned hashDirNum, int64_t lastOffset,
unsigned maxOutInodes, FsckDirInodeList* outDirInodes, FsckFileInodeList* outFileInodes,
int64_t* outNewOffset, bool isBuddyMirrored);
FhgfsOpsErr getAllEntryIDFilesIncremental(unsigned firstLevelhashDirNum,
unsigned secondLevelhashDirNum, int64_t lastOffset, unsigned maxOutEntries,
StringList* outEntryIDFiles, int64_t* outNewOffset, bool buddyMirrored);
void getReferenceStats(size_t* numReferencedDirs, size_t* numReferencedFiles);
void getCacheStats(size_t* numCachedDirs);
bool cacheSweepAsync();
FhgfsOpsErr insertDisposableFile(FileInode* inode);
std::pair<FhgfsOpsErr, bool> getEntryData(DirInode *dirInode, const std::string& entryName,
EntryInfo* outInfo, FileInodeStoreData* outInodeMetaData);
FhgfsOpsErr getEntryData(EntryInfo* inEntryInfo, FileInodeStoreData* outInodeMetaData);
FhgfsOpsErr linkInSameDir(DirInode& parentDir, EntryInfo* fromFileInfo,
const std::string& fromName, const std::string& toName);
std::pair<FhgfsOpsErr, unsigned> makeNewHardlink(EntryInfo* fromFileInfo);
FhgfsOpsErr verifyAndMoveFileInode(DirInode& parentDir, EntryInfo* fileInfo,
FileInodeMode moveMode);
FhgfsOpsErr checkAndRepairDupFileInode(DirInode& parentDir, EntryInfo* entryInfo);
FhgfsOpsErr getRawMetadata(const Path& path, const char* attrName, CharVector& contents);
std::pair<FhgfsOpsErr, IncompleteInode> beginResyncFor(const Path& path, bool isDirectory);
FhgfsOpsErr unlinkRawMetadata(const Path& path);
FhgfsOpsErr setFileState(EntryInfo* entryInfo, const FileState& state);
void invalidateMirroredDirInodes();
private:
InodeDirStore dirStore;
/* We need to avoid to use that one, as it is a global store, with possible lots of entries.
* So access to the map is slow and inserting entries blocks the entire MetaStore */
InodeFileStore fileStore;
GlobalInodeLockStore inodeLockStore;
RWLock rwlock; /* note: this is mostly not used as a read/write-lock but rather a shared/excl
lock (because we're not really modifying anyting directly) - especially relevant for the
mutliple dirStore locking dual-move methods */
FhgfsOpsErr isFileUnlinkable(DirInode& subDir, EntryInfo* entryInfo);
FhgfsOpsErr mkMetaFileUnlocked(DirInode& dir, const std::string& entryName,
EntryInfo* entryInfo, FileInode* inode);
FhgfsOpsErr unlinkInodeUnlocked(EntryInfo* entryInfo, DirInode* subDir,
std::unique_ptr<FileInode>* outInode);
FhgfsOpsErr unlinkInodeLaterUnlocked(EntryInfo* entryInfo, bool wasInlined);
FhgfsOpsErr unlinkFileUnlocked(DirInode& subdir, const std::string& fileName,
std::unique_ptr<FileInode>* outInode, EntryInfo* outEntryInfo, bool& outWasInlined,
unsigned& outNumHardlinks);
FhgfsOpsErr unlinkDirEntryWithInlinedInodeUnlocked(const std::string& entryName,
DirInode& subDir, DirEntry* dirEntry, unsigned unlinkTypeFlags,
std::unique_ptr<FileInode>* outInode, unsigned& outNumHardlinks);
FhgfsOpsErr unlinkDentryAndInodeUnlocked(const std::string& fileName, DirInode& subdir,
DirEntry* dirEntry, unsigned unlinkTypeFlags, std::unique_ptr<FileInode>* outInode,
unsigned& outNumHardlinks);
FhgfsOpsErr unlinkOverwrittenEntry(DirInode& parentDir, DirEntry* overWrittenEntry,
std::unique_ptr<FileInode>* outInode);
FhgfsOpsErr unlinkOverwrittenEntryUnlocked(DirInode& parentDir, DirEntry* overWrittenEntry,
std::unique_ptr<FileInode>* outInode);
DirInode* referenceDirUnlocked(const std::string& dirID, bool isBuddyMirrored,
bool forceLoad);
void releaseDirUnlocked(const std::string& dirID);
MetaFileHandleRes referenceFileUnlocked(EntryInfo* entryInfo, bool checkLockStore = true);
MetaFileHandleRes referenceFileUnlocked(DirInode& subDir, EntryInfo* entryInfo,
bool checkLockStore = true);
MetaFileHandle referenceLoadedFileUnlocked(const std::string& parentEntryID,
bool isBuddyMirrored, const std::string& entryID);
MetaFileHandle referenceLoadedFileUnlocked(DirInode& subDir, const std::string& entryID);
bool releaseFileUnlocked(const std::string& parentEntryID, MetaFileHandle& inode);
bool releaseFileUnlocked(DirInode& subDir, MetaFileHandle& inode);
MetaFileHandleRes tryReferenceFileWriteLocked(EntryInfo* entryInfo, bool checkLockStore = true);
FhgfsOpsErr tryOpenFileWriteLocked(EntryInfo* entryInfo, unsigned accessFlags, bool bypassAccessCheck,
MetaFileHandle& outInode);
bool moveReferenceToMetaFileStoreUnlocked(const std::string& parentEntryID,
bool parentIsBuddyMirrored, const std::string& entryID);
FhgfsOpsErr performRenameEntryInSameDir(DirInode& dir, const std::string& fromName,
const std::string& toName, DirEntry** outOverwrittenEntry);
FhgfsOpsErr checkRenameOverwrite(EntryInfo* fromEntry, EntryInfo* overWriteEntry,
bool& outIsSameInode);
FhgfsOpsErr setAttrUnlocked(EntryInfo* entryInfo, int validAttribs,
SettableFileAttribs* attribs);
FhgfsOpsErr incDecLinkCountUnlocked(EntryInfo* entryInfo, int value);
FhgfsOpsErr verifyAndMoveFileInodeUnlocked(DirInode& parentDir, EntryInfo* fileInfo,
FileInodeMode moveMode);
FhgfsOpsErr deinlineFileInode(DirInode& parentDir, EntryInfo* entryInfo,
DirEntry& dentry, const std::string& dirEntryPath);
FhgfsOpsErr reinlineFileInode(DirInode& parentDir, EntryInfo* entryInfo,
DirEntry& dentry, const std::string& dirEntryPath);
public:
// getters & setters
GlobalInodeLockStore* getInodeLockStore()
{
return &inodeLockStore;
}
// inliners
};

View File

@@ -0,0 +1,527 @@
/*
* MetaStoreRenameHelper.cpp
*
* These methods belong to class MetaStore, but are all related to rename()
*/
#include <common/storage/striping/Raid0Pattern.h>
#include <common/toolkit/FsckTk.h>
#include <net/msghelpers/MsgHelperStat.h>
#include <net/msghelpers/MsgHelperMkFile.h>
#include <program/Program.h>
#include "MetaStore.h"
#include <sys/types.h>
#include <dirent.h>
#include "MetaStore.h"
#include <boost/lexical_cast.hpp>
/**
* Simple rename on the same server in the same directory.
*
* @param outUnlinkInode is the inode of a dirEntry being possibly overwritten (toName already
* existed).
*/
FhgfsOpsErr MetaStore::renameInSameDir(DirInode& parentDir, const std::string& fromName,
const std::string& toName, std::unique_ptr<FileInode>* outUnlinkInode,
DirEntry*& outOverWrittenEntry, bool& outUnlinkedWasInlined)
{
const char* logContext = "Rename in dir";
SafeRWLock safeLock(&rwlock, SafeRWLock_READ); // L O C K
SafeRWLock fromMutexLock(&parentDir.rwlock, SafeRWLock_WRITE); // L O C K ( F R O M )
FhgfsOpsErr retVal;
FhgfsOpsErr unlinkRes = FhgfsOpsErr_SUCCESS; // initialize just to please compiler
outOverWrittenEntry = NULL;
retVal = performRenameEntryInSameDir(parentDir, fromName, toName, &outOverWrittenEntry);
if (retVal != FhgfsOpsErr_SUCCESS)
{
fromMutexLock.unlock();
safeLock.unlock();
SAFE_DELETE(outOverWrittenEntry);
return retVal;
}
EntryInfo unlinkEntryInfo;
// unlink for non-inlined inode will be handled later
if (outOverWrittenEntry)
{
const std::string& parentDirID = parentDir.getID();
outOverWrittenEntry->getEntryInfo(parentDirID, 0, &unlinkEntryInfo);
outUnlinkedWasInlined = outOverWrittenEntry->getIsInodeInlined();
if (outOverWrittenEntry->getIsInodeInlined())
{
unlinkRes = unlinkOverwrittenEntryUnlocked(parentDir, outOverWrittenEntry, outUnlinkInode);
}
else
{
outUnlinkInode->reset();
unlinkRes = FhgfsOpsErr_SUCCESS;
}
}
/* Now update the ctime (attribChangeTime) of the renamed entry.
* Only do that for Directory dentry after giving up the DirInodes (fromMutex) lock
* as dirStore.setAttr() will aquire the InodeDirStore:: lock
* and the lock order is InodeDirStore:: and then DirInode:: (risk of deadlock) */
DirEntry* entry = parentDir.dirEntryCreateFromFileUnlocked(toName);
if (likely(entry) ) // entry was just renamed to, so very likely it exists
{
EntryInfo entryInfo;
const std::string& parentID = parentDir.getID();
entry->getEntryInfo(parentID, 0, &entryInfo);
fromMutexLock.unlock();
setAttrUnlocked(&entryInfo, 0, NULL); /* This will fail if the DirInode is on another
* meta server, but as updating the ctime is not
* a real posix requirement (but filesystems usually
* do it) we simply ignore this issue for now. */
SAFE_DELETE(entry);
}
else
fromMutexLock.unlock();
safeLock.unlock();
// unlink later must be called after releasing all locks
if (outOverWrittenEntry)
{
if (unlinkRes == FhgfsOpsErr_INUSE)
{
unlinkRes = unlinkInodeLater(&unlinkEntryInfo, outUnlinkedWasInlined );
if (unlinkRes == FhgfsOpsErr_AGAIN)
{
unlinkRes = unlinkOverwrittenEntry(parentDir, outOverWrittenEntry, outUnlinkInode);
}
}
if (unlinkRes != FhgfsOpsErr_SUCCESS && unlinkRes != FhgfsOpsErr_PATHNOTEXISTS)
{
LogContext(logContext).logErr("Failed to unlink overwritten entry:"
" FileName: " + toName +
" ParentEntryID: " + parentDir.getID() +
" entryID: " + outOverWrittenEntry->getEntryID() +
" Error: " + boost::lexical_cast<std::string>(unlinkRes));
// TODO: Restore the dentry
}
}
return retVal;
}
/**
* Unlink an overwritten dentry. From this dentry either the #fsid# entry or its inode is left.
*
* Locking:
* We lock everything ourself
*/
FhgfsOpsErr MetaStore::unlinkOverwrittenEntry(DirInode& parentDir,
DirEntry* overWrittenEntry, std::unique_ptr<FileInode>* outInode)
{
SafeRWLock safeLock(&rwlock, SafeRWLock_READ); // L O C K
SafeRWLock parentLock(&parentDir.rwlock, SafeRWLock_WRITE);
FhgfsOpsErr unlinkRes = unlinkOverwrittenEntryUnlocked(parentDir, overWrittenEntry, outInode);
parentLock.unlock();
safeLock.unlock();
return unlinkRes;
}
/**
* See unlinkOverwrittenEntry() for details
*
* Locking:
* MetaStore rwlock: Read-lock
* parentDir : Write-lock
*/
FhgfsOpsErr MetaStore::unlinkOverwrittenEntryUnlocked(DirInode& parentDir,
DirEntry* overWrittenEntry, std::unique_ptr<FileInode>* outInode)
{
FhgfsOpsErr unlinkRes;
unsigned outNumHardlinks; // Not used here!
if (overWrittenEntry->getIsInodeInlined() )
{
/* We advise the calling code not to try to delete the entryName dentry,
* as renameEntryUnlocked() already did that */
unlinkRes = unlinkDirEntryWithInlinedInodeUnlocked("", parentDir, overWrittenEntry,
DirEntry_UNLINK_ID, outInode, outNumHardlinks);
}
else
{
// And also do not try to delete the dir-entry-by-name here.
unlinkRes = unlinkDentryAndInodeUnlocked("", parentDir, overWrittenEntry,
DirEntry_UNLINK_ID, outInode, outNumHardlinks);
}
return unlinkRes;
}
/**
* Perform the rename action here.
*
* In constrast to the moving...()-methods, this method performs a simple rename of an entry,
* where no moving is involved.
*
* Rules: Files can overwrite existing files, but not existing dirs. Dirs cannot overwrite any
* existing entry.
*
* @param dir needs to write-locked already
* @param outOverwrittenEntry the caller is responsible for the deletion of the local file;
* accoring to the rules, this can only be an overwritten file, not a dir; may not be NULL.
* Also, we only overwrite the entryName dentry, but not the ID dentry.
*
* Note: MetaStore is ReadLocked, dir is WriteLocked
*/
FhgfsOpsErr MetaStore::performRenameEntryInSameDir(DirInode& dir, const std::string& fromName,
const std::string& toName, DirEntry** outOverwrittenEntry)
{
*outOverwrittenEntry = NULL;
FhgfsOpsErr retVal;
// load DirInode on demand if required, we need it now
bool loadSuccess = dir.loadIfNotLoadedUnlocked();
if (!loadSuccess)
return FhgfsOpsErr_PATHNOTEXISTS;
// of the file being renamed
DirEntry* fromEntry = dir.dirEntryCreateFromFileUnlocked(fromName);
if (!fromEntry)
{
return FhgfsOpsErr_PATHNOTEXISTS;
}
EntryInfo fromEntryInfo;
const std::string& parentEntryID = dir.getID();
fromEntry->getEntryInfo(parentEntryID, 0, &fromEntryInfo);
// reference the inode
MetaFileHandle fromFileInode;
// DirInode* fromDirInode = NULL;
if (DirEntryType_ISDIR(fromEntryInfo.getEntryType() ) )
{
// TODO, exclusive lock
}
else
{
// for nonInlined inode(s) - inode may not be present on local meta server
// only try to referece file inode for inlined inode(s)
if (fromEntry->getIsInodeInlined())
{
FhgfsOpsErr referenceRes;
std::tie(fromFileInode, referenceRes) = referenceFileUnlocked(dir, &fromEntryInfo);
if (!fromFileInode)
{
/* Note: The inode might be exclusively locked and a remote rename op might be in progress.
* If that fails we should actually continue with our rename. That will be solved
* in the future by using hardlinks for remote renaming. */
return referenceRes;
}
}
}
DirEntry* overWriteEntry = dir.dirEntryCreateFromFileUnlocked(toName);
if (overWriteEntry)
{
// sanity checks if we really shall overwrite the entry
const std::string& parentID = dir.getID();
EntryInfo fromEntryInfo;
fromEntry->getEntryInfo(parentID , 0, &fromEntryInfo);
EntryInfo overWriteEntryInfo;
overWriteEntry->getEntryInfo(parentID, 0, &overWriteEntryInfo);
bool isSameInode;
retVal = checkRenameOverwrite(&fromEntryInfo, &overWriteEntryInfo, isSameInode);
if (isSameInode)
{
delete(overWriteEntry);
overWriteEntry = NULL;
goto out; // nothing to do then, rename request will be silently ignored
}
if (retVal != FhgfsOpsErr_SUCCESS)
goto out; // not allowed for some reasons, return it to the user
}
// eventually rename here
retVal = dir.renameDirEntryUnlocked(fromName, toName, overWriteEntry);
/* Note: If rename faild and and an existing toName was to be overwritten, we don't need to care
* about it, the underlying file system has to handle it. */
/* Note2: Do not decrease directory link count here, even if we overwrote an entry. We will do
* that later on in common unlink code, when we going to unlink the entry from
* the #fsIDs# dir.
*/
if (fromFileInode)
releaseFileUnlocked(dir, fromFileInode);
else
{
// TODO dir
}
out:
if (retVal == FhgfsOpsErr_SUCCESS)
*outOverwrittenEntry = overWriteEntry;
else
SAFE_DELETE(overWriteEntry);
SAFE_DELETE(fromEntry); // always exists when we are here
return retVal;
}
/**
* Check if overwriting an entry on rename is allowed.
*/
FhgfsOpsErr MetaStore::checkRenameOverwrite(EntryInfo* fromEntry, EntryInfo* overWriteEntry,
bool& outIsSameInode)
{
outIsSameInode = false;
// check if we are going to rename to a dentry with the same inode
if (fromEntry->getEntryID() == overWriteEntry->getEntryID() )
{ // According to posix we must not do anything and return success.
outIsSameInode = true;
return FhgfsOpsErr_SUCCESS;
}
if (overWriteEntry->getEntryType() == DirEntryType_DIRECTORY)
{
return FhgfsOpsErr_EXISTS;
}
/* TODO: We should allow this if overWriteEntry->getEntryType() == DirEntryType_DIRECTORY
* and overWriteEntry is empty.
*/
if (fromEntry->getEntryType() == DirEntryType_DIRECTORY)
{
return FhgfsOpsErr_EXISTS;
}
return FhgfsOpsErr_SUCCESS;
}
/**
* Create a new file on this (remote) meta-server. This is the 'toFile' on a rename() client call.
*
* Note: Replaces existing entry.
*
* @param buf serialized inode object
* @param outUnlinkedInode the unlinked (owned) file (in case a file was overwritten
* @param overWriteInfo entryInfo of overwritten (and possibly unlinked inode if it was inlined) file
* by the move operation); the caller is responsible for the deletion of the local file and the
* corresponding object; may not be NULL
*/
FhgfsOpsErr MetaStore::moveRemoteFileInsert(EntryInfo* fromFileInfo, DirInode& toParent,
const std::string& newEntryName, const char* buf, uint32_t bufLen,
std::unique_ptr<FileInode>* outUnlinkedInode, EntryInfo* overWriteInfo, EntryInfo& newFileInfo)
{
// note: we do not allow newEntry to be a file if the old entry was a directory (and vice versa)
const char* logContext = "rename(): Insert remote entry";
FhgfsOpsErr retVal = FhgfsOpsErr_INTERNAL;
outUnlinkedInode->reset();
SafeRWLock safeMetaStoreLock(&rwlock, SafeRWLock_READ); // L O C K
SafeRWLock toParentMutexLock(&toParent.rwlock, SafeRWLock_WRITE); // L O C K ( T O )
std::unique_ptr<DirEntry> overWrittenEntry(toParent.dirEntryCreateFromFileUnlocked(newEntryName));
if (overWrittenEntry)
{
const std::string& parentID = overWrittenEntry->getID();
overWrittenEntry->getEntryInfo(parentID, 0, overWriteInfo);
bool isSameInode;
FhgfsOpsErr checkRes = checkRenameOverwrite(fromFileInfo, overWriteInfo, isSameInode);
if ((checkRes != FhgfsOpsErr_SUCCESS) || ((checkRes == FhgfsOpsErr_SUCCESS) && isSameInode) )
{
retVal = checkRes;
goto outUnlock;
}
// only unlink the dir-entry-name here, so we can still restore it from dir-entry-id
FhgfsOpsErr unlinkRes = toParent.unlinkDirEntryUnlocked(newEntryName, overWrittenEntry.get(),
DirEntry_UNLINK_FILENAME);
if (unlikely(unlinkRes != FhgfsOpsErr_SUCCESS) )
{
if (unlikely (unlinkRes == FhgfsOpsErr_PATHNOTEXISTS) )
LogContext(logContext).log(Log_WARNING, "Unexpectedly failed to unlink file: " +
toParent.entries.getDirEntryPathUnlocked() + newEntryName + ". ");
else
{
LogContext(logContext).logErr("Failed to unlink existing file. Aborting rename().");
retVal = unlinkRes;
goto outUnlock;
}
}
}
{ // create new dirEntry with inlined inode
FileInode* inode = new FileInode(); // the deserialized inode
Deserializer des(buf, bufLen);
inode->deserializeMetaData(des);
if (!des.good())
{
LogContext("File rename").logErr("Bug: Deserialization of remote buffer failed. Are all "
"meta servers running with the same version?" );
retVal = FhgfsOpsErr_INTERNAL;
delete inode;
goto outUnlock;
}
// ensure that the buddyMirrored flag of the created inode is set correctly. the source could
// check this as well, but since we already have the destination dir inode, we are in a better
// position to do this.
if (toParent.getIsBuddyMirrored())
inode->setIsBuddyMirrored();
else
inode->setIsBuddyMirrored(false);
// deserialize RSTs and set in inode object
if (inode->getIsRstAvailable())
{
RemoteStorageTarget rstInfo;
des % rstInfo;
inode->setRemoteStorageTargetUnpersistent(rstInfo);
}
// destructs inode
retVal = mkMetaFileUnlocked(toParent, newEntryName, fromFileInfo, inode);
}
if (retVal == FhgfsOpsErr_SUCCESS)
{
if (!toParent.entries.getFileEntryInfo(newEntryName, newFileInfo))
retVal = FhgfsOpsErr_INTERNAL;
}
if (overWrittenEntry && overWrittenEntry->getIsInodeInlined() && (retVal == FhgfsOpsErr_SUCCESS))
{
// unlink overwritten entry if it had an inlined inode (non-inlined inodes will be unlinked later)
bool unlinkedWasInlined = overWrittenEntry->getIsInodeInlined();
FhgfsOpsErr unlinkRes = unlinkOverwrittenEntryUnlocked(toParent, overWrittenEntry.get(),
outUnlinkedInode);
EntryInfo unlinkEntryInfo;
overWrittenEntry->getEntryInfo(toParent.getID(), 0, &unlinkEntryInfo);
// unlock everything here, but do not release toParent yet.
toParentMutexLock.unlock(); // U N L O C K ( T O )
safeMetaStoreLock.unlock();
// unlinkInodeLater() requires that everything was unlocked!
if (unlinkRes == FhgfsOpsErr_INUSE)
{
unlinkRes = unlinkInodeLater(&unlinkEntryInfo, unlinkedWasInlined);
if (unlinkRes == FhgfsOpsErr_AGAIN)
unlinkRes = unlinkOverwrittenEntry(toParent, overWrittenEntry.get(), outUnlinkedInode);
if (unlinkRes != FhgfsOpsErr_SUCCESS && unlinkRes != FhgfsOpsErr_PATHNOTEXISTS)
LogContext(logContext).logErr("Failed to unlink overwritten entry:"
" FileName: " + newEntryName +
" ParentEntryID: " + toParent.getID() +
" entryID: " + overWrittenEntry->getEntryID() +
" Error: " + boost::lexical_cast<std::string>(unlinkRes));
}
return retVal;
}
else if (overWrittenEntry)
{
// TODO: Restore the overwritten entry
}
outUnlock:
toParentMutexLock.unlock(); // U N L O C K ( T O )
safeMetaStoreLock.unlock();
return retVal;
}
/**
* Copies (serializes) the original file object to a buffer.
*
* Note: This works by inserting a temporary placeholder and returning the original, so remember to
* call movingComplete()
*
* @param buf target buffer for serialization
* @param bufLen must be at least META_SERBUF_SIZE
*/
FhgfsOpsErr MetaStore::moveRemoteFileBegin(DirInode& dir, EntryInfo* entryInfo,
char* buf, size_t bufLen, size_t* outUsedBufLen)
{
FhgfsOpsErr retVal = FhgfsOpsErr_INTERNAL;
SafeRWLock safeLock(&this->rwlock, SafeRWLock_READ); // L O C K
// lock the dir to make sure no renameInSameDir is going on
SafeRWLock safeDirLock(&dir.rwlock, SafeRWLock_READ);
if (entryInfo->getIsInlined())
{
if (this->fileStore.isInStore(entryInfo->getEntryID()))
retVal = this->fileStore.moveRemoteBegin(entryInfo, buf, bufLen, outUsedBufLen);
else
retVal = dir.fileStore.moveRemoteBegin(entryInfo, buf, bufLen, outUsedBufLen);
}
else
{
// handle dentry for a non-inlined inode
DirEntry fileDentry(entryInfo->getFileName());
if (!dir.getDentryUnlocked(entryInfo->getFileName(), fileDentry))
return FhgfsOpsErr_PATHNOTEXISTS;
else
retVal = FhgfsOpsErr_SUCCESS;
Serializer ser(buf, bufLen);
fileDentry.serializeDentry(ser);
*outUsedBufLen = ser.size();
}
safeDirLock.unlock();
safeLock.unlock(); // U N L O C K
return retVal;
}
void MetaStore::moveRemoteFileComplete(DirInode& dir, const std::string& entryID)
{
SafeRWLock safeLock(&this->rwlock, SafeRWLock_WRITE); // L O C K
if (this->fileStore.isInStore(entryID) )
this->fileStore.moveRemoteComplete(entryID);
else
{
SafeRWLock safeDirLock(&dir.rwlock, SafeRWLock_READ);
dir.fileStore.moveRemoteComplete(entryID);
safeDirLock.unlock();
}
safeLock.unlock(); // U N L O C K
}

View File

@@ -0,0 +1,24 @@
#pragma once
#include <common/storage/Metadata.h>
#include <common/Common.h>
#define META_UPDATE_EXT_STR ".new-fhgfs"
#define META_XATTR_NAME "user.fhgfs" // attribute name for dir-entries, file and dir metadata
#define RST_XATTR_NAME "user.beermt" // attribute name for storing remote storage target info
// !!!IMPORTANT NOTICE TO MAINTAINER!!!
// Any new extended attribute (NON user-defined) that will be added
// in the future MUST be included in the list below.
//
// FAILURE TO DO SO WILL CAUSE:
// Inconsistencies between primary and secondary meta mirrors due to incomplete buddy
// resyncing, as the missing attribute's data will not be resynced to secondary meta.
const std::array<std::string, 2> METADATA_XATTR_NAME_LIST = {META_XATTR_NAME, RST_XATTR_NAME};
// The size must be sufficient to hold the entire dentry data. In order to simplify various
// operations, meta data or stored into a buffer and for example for a remote directory rename
// operation, this buffer is then transferred over net to the other meta node there used to fill
// the remote dentry, without any knowledge of the actual content.
#define META_SERBUF_SIZE (1024*8)

View File

@@ -0,0 +1,30 @@
/*
* File creation information
*
*/
#pragma once
struct MkFileDetails
{
MkFileDetails(const std::string& newName, const unsigned userID, const unsigned groupID,
const int mode, const int umask, int64_t createTime) :
newName(newName), userID(userID), groupID(groupID), mode(mode), umask(umask),
createTime(createTime)
{ }
void setNewEntryID(const char* newEntryID)
{
this->newEntryID = newEntryID;
}
std::string newName;
std::string newEntryID; // only used for mirroring on secondary
unsigned userID;
unsigned groupID;
int mode;
int umask;
int64_t createTime;
};

View File

@@ -0,0 +1,78 @@
#pragma once
#include <common/threading/SafeRWLock.h>
#include <common/toolkit/Time.h>
#include <common/toolkit/OfflineWaitTimeoutTk.h>
#include <app/config/Config.h>
/**
* Handles timeout during which the node may not send a state report to the mgmtd, so that the mgmtd
* will eventually set the node to (P)OFFLINE - e.g. when a primary needs a resync right after meta
* server startup
*/
class NodeOfflineWait
{
public:
NodeOfflineWait(Config* cfg)
: waitTimeoutMS(OfflineWaitTimeoutTk<Config>::calculate(cfg) ),
active(false)
{ }
private:
RWLock rwlock;
const unsigned waitTimeoutMS;
Time timer;
bool active;
public:
/**
* Starts the timer.
*/
void startTimer()
{
SafeRWLock safeLock(&rwlock, SafeRWLock_WRITE); // L O C K
active = true;
timer.setToNow();
safeLock.unlock(); // U N L O C K
}
/**
* Checks if the timer is running and has not run out yet.
*/
bool hasTimeout()
{
bool res = true;
{
SafeRWLock lock(&rwlock, SafeRWLock_READ); // L O C K
res = active;
lock.unlock(); // U N L O C K
}
if (res) // If active flag is set, timer was still running the last time we checked.
{ // Check it again and clear active flag if it has run out in the mean time.
SafeRWLock lock(&rwlock, SafeRWLock_WRITE); // L O C K
// else: Active flag is set - check if timer has run out yet.
unsigned elapsedMS = timer.elapsedMS();
if (elapsedMS >= waitTimeoutMS)
res = active = false;
lock.unlock(); // U N L O C K
if (res)
LogContext("Node Timeout").log(Log_WARNING,
"This node was a primary node of a mirror group and needs a resync. "
"Waiting until it is marked offline on all clients. (" +
StringTk::uintToStr( (this->waitTimeoutMS - elapsedMS) / 1000) +
" seconds left)");
}
return res;
}
};

View File

@@ -0,0 +1,159 @@
#include "PosixACL.h"
const std::string PosixACL::defaultACLXAttrName = "system.posix_acl_default";
const std::string PosixACL::accessACLXAttrName = "system.posix_acl_access";
/**
* @param xattr serialized form of the posix ACL to fill the PosixACL from.
*/
bool PosixACL::deserializeXAttr(const CharVector& xattr)
{
Deserializer des(&xattr[0], xattr.size());
{
int32_t version;
des % version;
if (!des.good() || version != ACLEntry::POSIX_ACL_XATTR_VERSION)
return false;
}
entries.clear();
while (des.good() && des.size() < xattr.size())
{
ACLEntry newEntry;
des % newEntry;
entries.push_back(newEntry);
}
return des.good();
}
/**
* Serialize the ACL in posix_acl_xattr form.
*/
void PosixACL::serializeXAttr(CharVector& xattr) const
{
xattr.clear();
// run twice: once with empty buffer, to calculate the size, then with proper buffer
for (int i = 0; i < 2; i++)
{
Serializer ser(&xattr[0], xattr.size());
ser % ACLEntry::POSIX_ACL_XATTR_VERSION;
// entries
for (ACLEntryVecCIter entryIt = entries.begin(); entryIt != entries.end(); ++entryIt)
ser % *entryIt;
xattr.resize(ser.size());
}
}
/**
* Modify the ACL based on the given mode bits. Also modifies the mode bits according to the
* permissions granted in the ACL.
* This effectively turns a directory default ACL into a file access ACL.
*
* @param mode Mode bits of the new file.
* @returns whether an ACL is necessary for the newly created file.
*/
FhgfsOpsErr PosixACL::modifyModeBits(int& outMode, bool& outNeedsACL)
{
outNeedsACL = false;
int newMode = outMode;
// Pointers to the group/mask mode of the ACL. In case we find group/mask entries, we have to
// take them into account last.
unsigned short* groupPerm = NULL;
unsigned short* maskPerm = NULL;
for (ACLEntryVecIter entryIt = entries.begin(); entryIt != entries.end(); ++entryIt)
{
ACLEntry& entry = *entryIt;
switch (entry.tag)
{
case ACLEntry::ACL_USER_OBJ:
{
// Apply 'user' permission of the mode to the ACL's 'owner' entry.
entry.perm &= (newMode >> 6) | ~0007;
// Apply 'owner' entry of the ACL to the owner's permission in the mode flags.
newMode &= (entry.perm << 6) | ~0700;
}
break;
case ACLEntry::ACL_USER:
case ACLEntry::ACL_GROUP:
{
// If the ACL has named user/group entries, it can't be represented using only
// mode bits.
outNeedsACL = true;
}
break;
case ACLEntry::ACL_GROUP_OBJ:
{
groupPerm = &entry.perm;
}
break;
case ACLEntry::ACL_OTHER:
{
// Apply 'other' permission from the mode to the ACL's 'other' entry.
entry.perm &= newMode | ~0007;
// Apply 'other' entry of the ACL to the 'other' permission in the mode flags.
newMode &= entry.perm | ~0007;
}
break;
case ACLEntry::ACL_MASK:
{
maskPerm = &entry.perm;
}
break;
default:
return FhgfsOpsErr_INTERNAL;
}
}
if (maskPerm)
{
// The 'mask' entry of the ACL influences the 'group' access bits and vice-versa.
*maskPerm &= (newMode >> 3) | ~0007;
newMode &= (*maskPerm << 3) | ~0070;
}
else
{
// If there's no mask, the group mode bits are determined using the group acl entry.
if (!groupPerm)
return FhgfsOpsErr_INTERNAL;
*groupPerm &= (newMode >> 3) | ~0007;
newMode &= (*groupPerm << 3) | ~0070;
}
// Apply changes to the last nine mode bits.
outMode = (outMode & ~0777) | newMode;
return FhgfsOpsErr_SUCCESS;
}
std::string PosixACL::toString()
{
std::ostringstream ostr;
ostr << "ACL Size: " << entries.size() << std::endl;
for (ACLEntryVecCIter it = entries.begin(); it != entries.end(); ++it)
ostr << "Entry[ "
<< "tag: " << it->tag << " perm: " << it->perm << " id: " << it->id
<< " ]" << std::endl;
return ostr.str();
}

View File

@@ -0,0 +1,57 @@
#pragma once
#include <common/toolkit/serialization/Serialization.h>
#include <common/Common.h>
#include <common/storage/StorageErrors.h>
#include <stdint.h>
#include <vector>
struct ACLEntry
{
// Definitions to match linux/posix_acl.h and linux/posix_acl_xattr.h
static const int32_t POSIX_ACL_XATTR_VERSION = 0x0002;
enum Tag {
ACL_USER_OBJ = 0x01,
ACL_USER = 0x02,
ACL_GROUP_OBJ = 0x04,
ACL_GROUP = 0x08,
ACL_MASK = 0x10,
ACL_OTHER = 0x20
};
int16_t tag;
uint16_t perm;
uint32_t id; // uid or gid depending on the tag
template<typename This, typename Ctx>
static void serialize(This obj, Ctx& ctx)
{
ctx
% obj->tag
% obj->perm
% obj->id;
}
};
typedef std::vector<ACLEntry> ACLEntryVec;
typedef ACLEntryVec::const_iterator ACLEntryVecCIter;
typedef ACLEntryVec::iterator ACLEntryVecIter;
class PosixACL
{
public:
bool deserializeXAttr(const CharVector& xattr);
void serializeXAttr(CharVector& xattr) const;
std::string toString();
FhgfsOpsErr modifyModeBits(int& outMode, bool& outNeedsACL);
bool empty() { return entries.empty(); }
static const std::string defaultACLXAttrName;
static const std::string accessACLXAttrName;
private:
ACLEntryVec entries;
};

View File

@@ -0,0 +1,133 @@
#pragma once
#include <common/storage/Path.h>
#include <common/system/System.h>
#include <toolkit/StorageTkEx.h>
class SyncedDiskAccessPath : public Path
{
public:
SyncedDiskAccessPath() : Path()
{
initStorageVersion();
}
SyncedDiskAccessPath(const std::string& pathStr) : Path(pathStr)
{
initStorageVersion();
}
private:
uint64_t storageVersion; // zero is the invalid version!
uint64_t highVersion; // high part of storageVersion (high 42 bits)
unsigned int lowVersion; // low part of storageVersion (low 22 bits)
Mutex diskMutex;
// inliners
void initStorageVersion()
{
const uint64_t currentSecs = System::getCurrentTimeSecs();
const unsigned int minorVersion = 0;
setStorageVersion(currentSecs, minorVersion);
}
/**
* @param high currentTimeSecs (high 42 bits)
* @param low minorVersion (low 22 bits)
*/
void setStorageVersion(uint64_t highVersion, unsigned int lowVersion)
{
this->highVersion = highVersion;
this->lowVersion = lowVersion;
this->storageVersion = (highVersion << 22);
this->storageVersion |= ( (lowVersion << 10) >> 10);
}
public:
// inliners
/**
* note: remember to call storageUpdateEnd()
* @return storageVersion of the current update
*/
void storageUpdateBegin()
{
diskMutex.lock();
}
void storageUpdateEnd()
{
diskMutex.unlock();
}
/**
* Requires a call to storageUpdateBegin before! (and ...End() afterwards)
*/
uint64_t incStorageVersion()
{
// note: this relies on the fact that the currentTimeSecs never ever decrease!
uint64_t nextHighVersion;
unsigned int nextLowVersion;
// we try to avoid the getTime sys-call for some minor versions
if(lowVersion < 1024)
{
nextHighVersion = this->highVersion; // remains unchanged
nextLowVersion = this->lowVersion+1;
}
else
{
// in this case, we have to check that the timeSecs really have increased
// before we reset the minor version
nextHighVersion = System::getCurrentTimeSecs();
if(nextHighVersion == this->highVersion)
{ // no timeSecs increase yet => use the minor version
nextLowVersion = this->lowVersion+1;
}
else
{ // high version increased => we can reset the minor version
nextLowVersion = 0;
}
}
setStorageVersion(nextHighVersion, nextLowVersion);
return storageVersion;
}
bool createSubPathOnDisk(Path& subpath, bool excludeLastElement)
{
storageUpdateBegin();
Path completePath = *this / subpath;
bool createRes = StorageTk::createPathOnDisk(completePath, excludeLastElement);
storageUpdateEnd();
return createRes;
}
bool removeSubPathDirsFromDisk(Path& subpath, unsigned subKeepDepth)
{
storageUpdateBegin();
Path completePath = *this / subpath;
bool removeRes = StorageTk::removePathDirsFromDisk(
completePath, subKeepDepth + this->size() );
storageUpdateEnd();
return removeRes;
}
};