#include #include #include #include #include #include #include #include #include #include "MetaStore.h" #include #include #include #include #define MAX_DEBUG_LOCK_TRY_TIME 30 // max lock wait time in seconds /** * Reference the given directory. * * @param dirID the ID of the directory to reference * @param forceLoad not all operations require the directory to be loaded from disk (i.e. stat() * of a sub-entry) and the DirInode is only used for directory locking. Other * operations need the directory to locked and those shall set forceLoad = true. * Should be set to true if we are going to write to the DirInode anyway or * if DirInode information are required. * @return cannot be NULL if forceLoad=false, even if the directory with the given id * does not exist. But it can be NULL of forceLoad=true, as we only then * check if the directory exists on disk at all. */ DirInode* MetaStore::referenceDir(const std::string& dirID, const bool isBuddyMirrored, const bool forceLoad) { UniqueRWLock lock(rwlock, SafeRWLock_READ); return referenceDirUnlocked(dirID, isBuddyMirrored, forceLoad); } DirInode* MetaStore::referenceDirUnlocked(const std::string& dirID, bool isBuddyMirrored, bool forceLoad) { return dirStore.referenceDirInode(dirID, isBuddyMirrored, forceLoad); } void MetaStore::releaseDir(const std::string& dirID) { UniqueRWLock lock(rwlock, SafeRWLock_READ); releaseDirUnlocked(dirID); } void MetaStore::releaseDirUnlocked(const std::string& dirID) { dirStore.releaseDir(dirID); } /** * Reference a file. It is unknown if this file is already referenced in memory or needs to be * loaded. Therefore a complete entryInfo is required. */ MetaFileHandleRes MetaStore::referenceFile(EntryInfo* entryInfo, bool checkLockStore) { UniqueRWLock lock(rwlock, SafeRWLock_READ); auto [inode, referenceRes] = referenceFileUnlocked(entryInfo, checkLockStore); if (unlikely(!inode)) return {MetaFileHandle(), referenceRes}; // Return MetaFileHandle from here if: // 1. The inode is inlined, or // 2. The inode is loaded into the global store, indicating that // it was previously deinlined hence inlined=false was received from client, or // 3. Inode is already present in the global store (numParentRefs remains zero if inode // is referenced from the global store) if (inode->getIsInlined() || (inode->getNumParentRefs() == 0)) return {std::move(inode), referenceRes}; // If we reach at this point, then following holds true: // 1. The file inode is non-inlined (validated from on-disk metadata) // 2. The entryInfo received from client has stale value of inlined flag (i.e. true) // 3. The file inode is referenced in dir-specific store // // According to existing design, non-inlined file inodes should always be referenced // in the global inode store. Therefore, we need to perform following additional steps // to maintain consistency with aformentioned design: // 1. Take parent directory reference and release inode from dir-specific store // 2. Release the meta read-lock and call tryReferenceFileWriteLocked() to put inode // reference into the global store auto dir = referenceDirUnlocked(entryInfo->getParentEntryID(), entryInfo->getIsBuddyMirrored(), false); if (!dir) return {MetaFileHandle(), FhgfsOpsErr_INTERNAL}; UniqueRWLock subDirLock(dir->rwlock, SafeRWLock_READ); inode->decParentRef(); dir->fileStore.releaseFileInode(inode.get()); subDirLock.unlock(); releaseDirUnlocked(dir->getID()); releaseDirUnlocked(dir->getID()); lock.unlock(); // U N L O C K return tryReferenceFileWriteLocked(entryInfo, checkLockStore); } /** * See referenceFileInode() for details. We already have the lock here. */ MetaFileHandleRes MetaStore::referenceFileUnlocked(EntryInfo* entryInfo, bool checkLockStore) { // load inode into global store from disk if it is nonInlined bool loadFromDisk = !entryInfo->getIsInlined(); auto [inode, referenceRes] = this->fileStore.referenceFileInode(entryInfo, loadFromDisk, checkLockStore); if (inode) return {MetaFileHandle(inode, nullptr), referenceRes}; // not in global map, now per directory and also try to load from disk const std::string& parentEntryID = entryInfo->getParentEntryID(); bool isBuddyMirrored = entryInfo->getIsBuddyMirrored(); DirInode* subDir = referenceDirUnlocked(parentEntryID, isBuddyMirrored, false); if (!subDir) return {MetaFileHandle(), FhgfsOpsErr_PATHNOTEXISTS}; UniqueRWLock subDirLock(subDir->rwlock, SafeRWLock_READ); std::tie(inode, referenceRes) = subDir->fileStore.referenceFileInode(entryInfo, true, checkLockStore); if (!inode) { subDirLock.unlock(); releaseDirUnlocked(entryInfo->getParentEntryID()); return {MetaFileHandle(), referenceRes}; } // we are not going to release the directory here, as we are using its FileStore inode->incParentRef(parentEntryID); return {MetaFileHandle(inode, nullptr), referenceRes};; } /** * See referenceFileInode() for details. DirInode is already known here and will not be futher * referenced due to the write-lock hold by the caller. Getting further references might cause * dead locks due to locking order problems (DirStore needs to be locked while holding the DirInode * lock). * * Locking: * MetaStore read locked * DirInode write locked. * * Note: Callers must release FileInode before releasing DirInode! Use this method with care! * */ MetaFileHandleRes MetaStore::referenceFileUnlocked(DirInode& subDir, EntryInfo* entryInfo, bool checkLockStore) { // load inode into global store from disk if it is nonInlined bool loadFromDisk = !entryInfo->getIsInlined(); auto [inode, referenceRes] = this->fileStore.referenceFileInode(entryInfo, loadFromDisk, checkLockStore); if (inode) return {MetaFileHandle(inode, nullptr), referenceRes}; // not in global map, now per directory and also try to load from disk std::tie(inode, referenceRes) = subDir.fileStore.referenceFileInode(entryInfo, true, checkLockStore); return {MetaFileHandle(inode, nullptr), referenceRes}; } /** * Reference a file known to be already referenced. So disk-access is not required and we don't * need the complete EntryInfo, but parentEntryID and entryID are sufficient. * Another name for this function also could be "referenceReferencedFiles". */ MetaFileHandle MetaStore::referenceLoadedFile(const std::string& parentEntryID, bool parentIsBuddyMirrored, const std::string& entryID) { UniqueRWLock lock(rwlock, SafeRWLock_READ); return referenceLoadedFileUnlocked(parentEntryID, parentIsBuddyMirrored, entryID); } /** * See referenceLoadedFile() for details. We already have the lock here. */ MetaFileHandle MetaStore::referenceLoadedFileUnlocked(const std::string& parentEntryID, bool isBuddyMirrored, const std::string& entryID) { FileInode* inode = this->fileStore.referenceLoadedFile(entryID); if (inode) return {inode, nullptr}; // not in global map, now per directory and also try to load from disk DirInode* subDir = referenceDirUnlocked(parentEntryID, isBuddyMirrored, false); if (!subDir) return {}; UniqueRWLock subDirLock(subDir->rwlock, SafeRWLock_READ); inode = subDir->fileStore.referenceLoadedFile(entryID); if (!inode) { subDirLock.unlock(); releaseDirUnlocked(parentEntryID); return {}; } // we are not going to release the directory here, as we are using its FileStore inode->incParentRef(parentEntryID); return {inode, subDir}; } /** * See referenceFileInode() for details. DirInode is already known here and will not be futher * referenced due to the write-lock hold by the caller. Getting further references might cause * dead locks due to locking order problems (DirStore needs to be locked while holding the DirInode * lock). * * Locking: * MetaStore read locked * DirInode write locked. * * Note: Callers must release FileInode before releasing DirInode! Use this method with care! * */ MetaFileHandle MetaStore::referenceLoadedFileUnlocked(DirInode& subDir, const std::string& entryID) { FileInode* inode = this->fileStore.referenceLoadedFile(entryID); if (inode) return {inode, nullptr}; // not in global map, now per directory and also try to load from disk inode = subDir.fileStore.referenceLoadedFile(entryID); return {inode, nullptr}; } /** * Tries to reference the file inode with MetaStore's write lock held. * Moves the reference to the global store if not already present. * * @param entryInfo The entry information of the file. * @return A handle to the file inode if successful, otherwise an empty handle. */ MetaFileHandleRes MetaStore::tryReferenceFileWriteLocked(EntryInfo* entryInfo, bool checkLockStore) { UniqueRWLock lock(rwlock, SafeRWLock_WRITE); if (!this->fileStore.isInStore(entryInfo->getEntryID())) { this->moveReferenceToMetaFileStoreUnlocked(entryInfo->getParentEntryID(), entryInfo->getIsBuddyMirrored(), entryInfo->getEntryID()); } auto [inode, referenceRes] = this->fileStore.referenceFileInode(entryInfo, true, checkLockStore); if (inode) return {MetaFileHandle(inode, nullptr), referenceRes}; return {MetaFileHandle(), referenceRes}; } /** * Tries to open the file with MetaStore's write lock held. * Moves the reference to the global store if not present. * * @param entryInfo The entry information of the file. * @param accessFlags The access flags for opening the file. * @param bypassAccessCheck Whether to bypass access checks. * @param outInode Output parameter for the file inode handle. * @return Error code indicating the result of the operation. */ FhgfsOpsErr MetaStore::tryOpenFileWriteLocked(EntryInfo* entryInfo, unsigned accessFlags, bool bypassAccessCheck, MetaFileHandle& outInode) { UniqueRWLock lock(rwlock, SafeRWLock_WRITE); if (!this->fileStore.isInStore(entryInfo->getEntryID())) { this->moveReferenceToMetaFileStoreUnlocked(entryInfo->getParentEntryID(), entryInfo->getIsBuddyMirrored(), entryInfo->getEntryID()); } FileInode* inode; auto openRes = this->fileStore.openFile(entryInfo, accessFlags, inode, /* loadFromDisk */ true, bypassAccessCheck); outInode = {inode, nullptr}; return openRes; } bool MetaStore::releaseFile(const std::string& parentEntryID, MetaFileHandle& inode) { UniqueRWLock lock(rwlock, SafeRWLock_READ); return releaseFileUnlocked(parentEntryID, inode); } /** * Release a file inode. * * Note: If the inode belongs to the per-directory file store also this directory will be * released. */ bool MetaStore::releaseFileUnlocked(const std::string& parentEntryID, MetaFileHandle& inode) { bool releaseRes = fileStore.releaseFileInode(inode.get()); if (releaseRes) return true; // Not in global map, now per directory and also try to load from disk. // NOTE: we assume here, that if the file inode is buddy mirrored, the parent is buddy // mirrored, to DirInode* subDir = referenceDirUnlocked(parentEntryID, inode->getIsBuddyMirrored(), false); if (!subDir) return false; UniqueRWLock subDirLock(subDir->rwlock, SafeRWLock_READ); inode->decParentRef(); releaseRes = subDir->fileStore.releaseFileInode(inode.get()); // this is the current lock and reference subDirLock.unlock(); releaseDirUnlocked(parentEntryID); // when we referenced the inode we did not release the directory yet, so do that here releaseDirUnlocked(parentEntryID); return releaseRes; } /** * Release a file inode. DirInode is known by the caller. Only call this after using * referenceFileUnlocked(DirInode* subDir, EntryInfo* entryInfo) * or * referenceLoadedFileUnlocked(DirInode* subDir, std::string entryID) * to reference a FileInode. * * Locking: * MetaStore is locked * DirInode is locked * * Note: No extra DirInode releases here, as the above mentioned referenceFileUnlocked() method also * does not get additional references. */ bool MetaStore::releaseFileUnlocked(DirInode& subDir, MetaFileHandle& inode) { bool releaseRes = fileStore.releaseFileInode(inode.get()); if (releaseRes) return true; // Not in global map, now per directory. return subDir.fileStore.releaseFileInode(inode.get()); } /* * Can be used to reference an inode, if it is unknown whether it is a file or directory. While * one of the out.... pointers will hold a reference to the corresponidng inode (which must be * released), the other pointer will be set to NULL. * * Note: This funtion is mainly used by fsck * Note: dirInodes will always be loaded from disk with this function * Note: This will NOT work with inlined file inodes, as we only pass the ID * Note: This is not very efficient, but OK for now. Can definitely be optimized. * * @param entryID * @param outDirInode * @param outFilenode * * @return false if entryID could neither be loaded as dir nor as file inode */ bool MetaStore::referenceInode(const std::string& entryID, bool isBuddyMirrored, MetaFileHandle& outFileInode, DirInode*& outDirInode) { outFileInode = {}; outDirInode = NULL; FhgfsOpsErr referenceRes; // trying dir first, because we assume there are more non-inlined dir inodes than file inodes outDirInode = referenceDir(entryID, isBuddyMirrored, true); if (outDirInode) return true; // opening as dir failed => try as file /* we do not set full entryInfo (we do not have most of the info), but only entryID. That's why it does not work with inlined inodes */ EntryInfo entryInfo(NumNodeID(), "", entryID, "", DirEntryType_REGULARFILE,0); if (isBuddyMirrored) entryInfo.setBuddyMirroredFlag(true); std::tie(outFileInode, referenceRes) = referenceFile(&entryInfo); return outFileInode; } FhgfsOpsErr MetaStore::isFileUnlinkable(DirInode& subDir, EntryInfo *entryInfo) { FhgfsOpsErr isUnlinkable = this->fileStore.isUnlinkable(entryInfo); if (isUnlinkable == FhgfsOpsErr_SUCCESS) isUnlinkable = subDir.fileStore.isUnlinkable(entryInfo); return isUnlinkable; } /** * Move a fileInode reference from subDir->fileStore to (MetaStore) this->fileStore * * @return true if an existing reference was moved * * Note: MetaStore needs to be write locked! */ bool MetaStore::moveReferenceToMetaFileStoreUnlocked(const std::string& parentEntryID, bool parentIsBuddyMirrored, const std::string& entryID) { bool retVal = false; const char* logContext = "MetaStore (Move reference from per-Dir fileStore to global Store)"; DirInode* subDir = referenceDirUnlocked(parentEntryID, parentIsBuddyMirrored, false); if (unlikely(!subDir) ) return false; UniqueRWLock subDirLock(subDir->rwlock, SafeRWLock_WRITE); FileInodeReferencer* inodeRefer = subDir->fileStore.getReferencerAndDeleteFromMap(entryID); if (likely(inodeRefer) ) { // The inode is referenced in the per-directory fileStore. Move it to the global store. FileInode* inode = inodeRefer->reference(); ssize_t numParentRefs = inode->getNumParentRefs(); retVal = this->fileStore.insertReferencer(entryID, inodeRefer); if (unlikely(retVal == false) ) { std::string msg = "Bug: Failed to move to MetaStore FileStore - already exists in map" " ParentID: " + parentEntryID + " EntryID: " + entryID; LogContext(logContext).logErr(msg); /* NOTE: We are leaking memory here, but as there is a general bug, this is better * than trying to free an object possibly in-use. */ } else { while (numParentRefs > 0) { releaseDirUnlocked(parentEntryID); /* inode references also always keep a dir reference, * so we need to release the dir as well */ numParentRefs--; inode->decParentRef(); } inodeRefer->release(); } } else { retVal = true; /* it is probably a valid race (), so do not return an error, * unlinkInodeLaterUnlocked() also can handle it as it does find this inode * in the global FileStore then */ } subDirLock.unlock(); releaseDirUnlocked(parentEntryID); return retVal; } /** * @param accessFlags OPENFILE_ACCESS_... flags */ FhgfsOpsErr MetaStore::openFile(EntryInfo* entryInfo, unsigned accessFlags, bool bypassAccessCheck, MetaFileHandle& outInode, bool checkDisposalFirst) { UniqueRWLock lock(rwlock, SafeRWLock_READ); // session restore must load disposed files with disposal as parent entry id - if the file is // disposed any other parent id will also work, but will make that parent id unremovable for // as long as the file is opened. if (unlikely(checkDisposalFirst)) { auto eiDisposal = *entryInfo; eiDisposal.setParentEntryID(META_DISPOSALDIR_ID_STR); FileInode* inode; auto openRes = this->fileStore.openFile(&eiDisposal, accessFlags, inode, /* loadFromDisk */ true, bypassAccessCheck); if (inode) { outInode = {inode, nullptr}; return openRes; } } /* check the MetaStore fileStore map here, but most likely the file will not be in this map, * but is either in the per-directory-map or has to be loaded from disk */ if (this->fileStore.isInStore(entryInfo->getEntryID())) { FileInode* inode; auto openRes = this->fileStore.openFile(entryInfo, accessFlags, inode, /* loadFromDisk */ false, bypassAccessCheck); outInode = {inode, nullptr}; return openRes; } // onwards v7.4.0 non-inlined inode might be present due to hardlinks. Non inlined file inodes // should be referenced from and stored into global file store (and not in per-directory-store) // if inode is non-inlined then code block below will load inode from disk and puts it into // global store if not already present there // // to forceLoad inode into global file store we need to pass "loadFromDisk = true" in call to // InodeFileStore::openFile(...) below if (!entryInfo->getIsInlined()) { FileInode* inode; auto openRes = this->fileStore.openFile(entryInfo, accessFlags, inode, /* loadFromDisk */ true, bypassAccessCheck); outInode = {inode, nullptr}; return openRes; } // not in global map, now per directory and also try to load from disk std::string parentEntryID = entryInfo->getParentEntryID(); // Note: We assume, that if the file is buddy mirrored, the parent is mirrored, too bool isBuddyMirrored = entryInfo->getIsBuddyMirrored(); DirInode* subDir = referenceDirUnlocked(parentEntryID, isBuddyMirrored, false); if (!subDir) return FhgfsOpsErr_INTERNAL; UniqueRWLock subDirLock(subDir->rwlock, SafeRWLock_READ); FileInode* inode; FhgfsOpsErr retVal = subDir->fileStore.openFile(entryInfo, accessFlags, inode, /* loadFromDisk */ true, bypassAccessCheck); outInode = {inode, subDir}; if (!outInode) { subDirLock.unlock(); releaseDirUnlocked(parentEntryID); return retVal; } if (outInode->getIsInlined()) { outInode->incParentRef(parentEntryID); // Do not release dir here, we are returning the inode referenced in subDirs fileStore! return retVal; } // If execution reaches this point then following holds true: // 1. The file inode is non-inlined (validated from on-disk metadata). // 2. The entryInfo received from client has stale value of inlined flag (i.e. true). // 3. The file inode reference exists in dir-specific store. // // According to existing design, non-inlined file inodes should always be referenced // in the global inode store. Therefore, the following additional steps are performed // to maintain consistency with the aforementioned design: // 1. Close the file in dir specific store and release parent directory reference // 2. Release meta read-lock and call tryOpenFileWriteLocked() to open the // file in global store. unsigned numHardlinks; // ignored here! unsigned numInodeRefs; // ignored here! bool lastWriterClosed; // ignored here! subDir->fileStore.closeFile(entryInfo, outInode.get(), accessFlags, &numHardlinks, &numInodeRefs, lastWriterClosed); subDirLock.unlock(); releaseDirUnlocked(parentEntryID); lock.unlock(); // U N L O C K return tryOpenFileWriteLocked(entryInfo, accessFlags, bypassAccessCheck, outInode); } /** * @param accessFlags OPENFILE_ACCESS_... flags * @param outNumHardlinks for quick on-close unlink check * @param outNumRef also for on-close unlink check * @param outLastWriterClosed set to true when last writer closes file */ void MetaStore::closeFile(EntryInfo* entryInfo, MetaFileHandle inode, unsigned accessFlags, unsigned* outNumHardlinks, unsigned* outNumRefs, bool& outLastWriterClosed) { const char* logContext = "Close file"; UniqueRWLock lock(rwlock, SafeRWLock_READ); // now release (and possible free (delete) the inode if not further referenced) // first try global metaStore map bool closeRes = this->fileStore.closeFile(entryInfo, inode.get(), accessFlags, outNumHardlinks, outNumRefs, outLastWriterClosed); if (closeRes) return; // not in global /store/map, now per directory // Note: We assume, that if the file is buddy mirrored, the parent is mirrored, too DirInode* subDir = referenceDirUnlocked(entryInfo->getParentEntryID(), entryInfo->getIsBuddyMirrored(), false); if (!subDir) return; UniqueRWLock subDirLock(subDir->rwlock, SafeRWLock_READ); inode->decParentRef(); /* Already decrease it here, as the inode might get destroyed * in closeFile(). The counter is very important if there is * another open reference on this file and if the file is unlinked * while being open */ closeRes = subDir->fileStore.closeFile(entryInfo, inode.get(), accessFlags, outNumHardlinks, outNumRefs, outLastWriterClosed); if (!closeRes) { LOG_DEBUG(logContext, Log_SPAM, "File not open: " + entryInfo->getEntryID() ); IGNORE_UNUSED_VARIABLE(logContext); } subDirLock.unlock(); releaseDirUnlocked(entryInfo->getParentEntryID()); // we kept another dir reference in openFile(), so release it here releaseDirUnlocked(entryInfo->getParentEntryID()); } /** * get statData of a DirInode or FileInode. * * @param outParentNodeID maybe NULL (default). Its value for FileInodes is always 0, as * the value is undefined for files due to possible hard links. * @param outParentEntryID, as with outParentNodeID it is undefined for files */ FhgfsOpsErr MetaStore::stat(EntryInfo* entryInfo, bool loadFromDisk, StatData& outStatData, NumNodeID* outParentNodeID, std::string* outParentEntryID) { FhgfsOpsErr statRes = FhgfsOpsErr_PATHNOTEXISTS; UniqueRWLock lock(rwlock, SafeRWLock_READ); if (entryInfo->getEntryType() == DirEntryType_DIRECTORY) { // entry is a dir return dirStore.stat(entryInfo->getEntryID(), entryInfo->getIsBuddyMirrored(), outStatData, outParentNodeID, outParentEntryID); } // entry is any type, but a directory, e.g. a regular file if (outParentNodeID) { // no need to set these for a regular file *outParentNodeID = NumNodeID(); } // first check if the inode is referenced in the global store // if inode is non-inlined then forceLoad it from disk if it is not // already present in global fileStore (similar to MetaStore::openFile()) statRes = fileStore.stat(entryInfo, !entryInfo->getIsInlined(), outStatData); if (statRes != FhgfsOpsErr_PATHNOTEXISTS) return statRes; // not in global /store/map, now per directory // Note: We assume, that if the file is buddy mirrored, the parent is mirrored, too DirInode* subDir = referenceDirUnlocked(entryInfo->getParentEntryID(), entryInfo->getIsBuddyMirrored(), false); if (likely(subDir)) { UniqueRWLock subDirLock(subDir->rwlock, SafeRWLock_READ); statRes = subDir->fileStore.stat(entryInfo, loadFromDisk, outStatData); subDirLock.unlock(); releaseDirUnlocked(entryInfo->getParentEntryID()); } return statRes; } FhgfsOpsErr MetaStore::setAttr(EntryInfo* entryInfo, int validAttribs, SettableFileAttribs* attribs) { UniqueRWLock lock(rwlock, SafeRWLock_READ); return setAttrUnlocked(entryInfo, validAttribs, attribs); } /** * @param validAttribs SETATTR_CHANGE_...-Flags or no flags to only update attribChangeTimeSecs * @param attribs new attribs, may be NULL if validAttribs==0 */ FhgfsOpsErr MetaStore::setAttrUnlocked(EntryInfo* entryInfo, int validAttribs, SettableFileAttribs* attribs) { FhgfsOpsErr setAttrRes = FhgfsOpsErr_PATHNOTEXISTS; if (DirEntryType_ISDIR(entryInfo->getEntryType())) return dirStore.setAttr(entryInfo->getEntryID(), entryInfo->getIsBuddyMirrored(), validAttribs, attribs); if (this->fileStore.isInStore(entryInfo->getEntryID() ) ) return this->fileStore.setAttr(entryInfo, validAttribs, attribs); // not in global /store/map, now per directory // NOTE: we assume that if the file is mirrored, the parent dir is mirrored too DirInode* subDir = referenceDirUnlocked(entryInfo->getParentEntryID(), entryInfo->getIsBuddyMirrored(), true); if (likely(subDir) ) { UniqueRWLock subDirLock(subDir->rwlock, SafeRWLock_READ); setAttrRes = subDir->fileStore.setAttr(entryInfo, validAttribs, attribs); subDirLock.unlock(); releaseDirUnlocked(entryInfo->getParentEntryID()); } return setAttrRes; } FhgfsOpsErr MetaStore::incDecLinkCount(EntryInfo* entryInfo, int value) { UniqueRWLock lock(rwlock, SafeRWLock_WRITE); return incDecLinkCountUnlocked(entryInfo, value); } /** * Update link count value in file inode */ FhgfsOpsErr MetaStore::incDecLinkCountUnlocked(EntryInfo* entryInfo, int value) { auto [inode, retVal] = referenceFileUnlocked(entryInfo); if (unlikely(!inode)) return retVal; if (!inode->incDecNumHardLinks(entryInfo, value)) retVal = FhgfsOpsErr_INTERNAL; else retVal = FhgfsOpsErr_SUCCESS; releaseFileUnlocked(entryInfo->getParentEntryID(), inode); return retVal; } /** * Set / update the parent information of a dir inode */ FhgfsOpsErr MetaStore::setDirParent(EntryInfo* entryInfo, NumNodeID parentNodeID) { if ( unlikely(!DirEntryType_ISDIR(entryInfo->getEntryType() )) ) return FhgfsOpsErr_INTERNAL; const std::string& dirID = entryInfo->getEntryID(); const bool isBuddyMirrored = entryInfo->getIsBuddyMirrored(); DirInode* dir = referenceDir(dirID, isBuddyMirrored, true); if ( !dir ) return FhgfsOpsErr_PATHNOTEXISTS; // also update the time stamps FhgfsOpsErr setRes = dir->setDirParentAndChangeTime(entryInfo, parentNodeID); releaseDir(dirID); return setRes; } /** * Create a File (dentry + inlined-inode) from an existing inlined inode * Create a dentry (in Ver-3 format) from existing dentry (if inode was deinlined) * * @param dir Already needs to be locked by the caller. * @param inode Take values from this inode to create the new file. Object will be destroyed * here. */ FhgfsOpsErr MetaStore::mkMetaFileUnlocked(DirInode& dir, const std::string& entryName, EntryInfo* entryInfo, FileInode* inode) { App* app = Program::getApp(); Node& localNode = app->getLocalNode(); DirEntryType entryType = entryInfo->getEntryType(); const std::string& entryID = inode->getEntryID(); NumNodeID ownerNodeID; if (entryInfo->getIsInlined()) { ownerNodeID = inode->getIsBuddyMirrored() ? NumNodeID(app->getMetaBuddyGroupMapper()->getLocalGroupID() ) : localNode.getNumID(); } else { // owner node may not be same as local node for files having deinlined inode ownerNodeID = entryInfo->getOwnerNodeID(); } DirEntry newDentry (entryType, entryName, entryID, ownerNodeID); if (inode->getIsBuddyMirrored()) newDentry.setBuddyMirrorFeatureFlag(); if (entryInfo->getIsInlined()) { // only set inode data if we are dealing with inlined inode(s) FileInodeStoreData inodeDiskData(entryID, inode->getInodeDiskData() ); inodeDiskData.setInodeFeatureFlags(inode->getFeatureFlags() ); newDentry.setFileInodeData(inodeDiskData); inodeDiskData.setPattern(NULL); /* pattern now owned by newDentry, so make sure it won't be * deleted on inodeMetadata object destruction */ } // create a dir-entry FhgfsOpsErr makeRes = dir.makeDirEntryUnlocked(&newDentry); // save RSTs to disk if ((makeRes == FhgfsOpsErr_SUCCESS) && inode->getIsRstAvailable()) { // Update isInlined flag and parentDirID in inode to ensure correct metafile path computation: // - isInlined: Usually set during inode data load from disk but here it needs to be updated // now for deriving correct metafile path // - parentDirID: Reflects the new parent directory after rename // Both of above must be updated before storing RSTs if (entryInfo->getIsInlined()) inode->setIsInlined(true); entryInfo->setParentEntryID(dir.getID()); inode->storeRemoteStorageTargetUnlocked(entryInfo); } delete inode; return makeRes; } /** * Create a new File (directory-entry with inlined inode) * * @param rstInfo must be provided by caller (can be invalid though) * @param stripePattern must be provided; will be assigned to given outInodeData. * @param outEntryInfo will not be set if NULL (caller not interested) * @param outInodeData will not be set if NULL (caller not interested) */ FhgfsOpsErr MetaStore::mkNewMetaFile(DirInode& dir, MkFileDetails* mkDetails, std::unique_ptr stripePattern, RemoteStorageTarget* rstInfo, EntryInfo* outEntryInfo, FileInodeStoreData* outInodeData) { UniqueRWLock metaLock(rwlock, SafeRWLock_READ); UniqueRWLock dirLock(dir.rwlock, SafeRWLock_WRITE); const char* logContext = "Make New Meta File"; App* app = Program::getApp(); Config* config = app->getConfig(); Node& localNode = app->getLocalNode(); MirrorBuddyGroupMapper* metaBuddyGroupMapper = app->getMetaBuddyGroupMapper(); const std::string& newEntryID = mkDetails->newEntryID.empty() ? StorageTk::generateFileID(localNode.getNumID() ) : mkDetails->newEntryID; const std::string& parentEntryID = dir.getID(); NumNodeID ownerNodeID = dir.getIsBuddyMirrored() ? NumNodeID(metaBuddyGroupMapper->getLocalGroupID() ) : localNode.getNumID(); // refuse to create a directory before we even touch the parent. a client could send a request // to create an S_IFDIR inode via mkfile. if (S_ISDIR(mkDetails->mode)) return FhgfsOpsErr_INVAL; // load DirInode on demand if required, we need it now if (!dir.loadIfNotLoadedUnlocked()) return FhgfsOpsErr_PATHNOTEXISTS; CharVector aclXAttr; bool needsACL; if (config->getStoreClientACLs()) { // Find out if parent dir has an ACL. FhgfsOpsErr aclXAttrRes; std::tie(aclXAttrRes, aclXAttr, std::ignore) = dir.getXAttr(nullptr, PosixACL::defaultACLXAttrName, XATTR_SIZE_MAX); if (aclXAttrRes == FhgfsOpsErr_SUCCESS) { // dir has a default acl. PosixACL defaultACL; if (!defaultACL.deserializeXAttr(aclXAttr)) { LogContext(logContext).log(Log_ERR, "Error deserializing directory default ACL."); return FhgfsOpsErr_INTERNAL; } else { if (!defaultACL.empty()) { // Note: This modifies mkDetails->mode as well as the ACL. FhgfsOpsErr modeRes = defaultACL.modifyModeBits(mkDetails->mode, needsACL); if (modeRes != FhgfsOpsErr_SUCCESS) return modeRes; if (needsACL) defaultACL.serializeXAttr(aclXAttr); } else { mkDetails->mode &= ~mkDetails->umask; needsACL = false; } } } else if (aclXAttrRes == FhgfsOpsErr_NODATA) { // Directory does not have a default ACL - subtract umask from mode bits. mkDetails->mode &= ~mkDetails->umask; needsACL = false; } else { LogContext(logContext).log(Log_ERR, "Error loading directory default ACL."); return FhgfsOpsErr_INTERNAL; } } else { needsACL = false; } DirEntryType entryType = MetadataTk::posixFileTypeToDirEntryType(mkDetails->mode); StatData statData(mkDetails->mode, mkDetails->userID, mkDetails->groupID, stripePattern->getAssignedNumTargets(), mkDetails->createTime); unsigned origParentUID = dir.getUserIDUnlocked(); // new file, we use the parent UID unsigned fileInodeFlags; if (dir.getIsBuddyMirrored()) fileInodeFlags = FILEINODE_FEATURE_BUDDYMIRRORED; else fileInodeFlags = 0; // (note: inodeMetaData constructor clones stripePattern) FileInodeStoreData inodeMetaData(newEntryID, &statData, stripePattern.get(), fileInodeFlags, origParentUID, parentEntryID, FileInodeOrigFeature_TRUE); DirEntry newDentry(entryType, mkDetails->newName, newEntryID, ownerNodeID); if (dir.getIsBuddyMirrored()) newDentry.setBuddyMirrorFeatureFlag(); // buddy mirroring is inherited newDentry.setFileInodeData(inodeMetaData); inodeMetaData.setPattern(NULL); /* cloned pattern now belongs to newDentry, so make sure it won't be deleted in inodeMetaData destructor */ // create a dir-entry with inlined inodes FhgfsOpsErr makeRes = dir.makeDirEntryUnlocked(&newDentry); if(makeRes == FhgfsOpsErr_SUCCESS) { // new entry successfully created if (outInodeData) { // set caller's outInodeData outInodeData->setInodeStatData(statData); outInodeData->setPattern(stripePattern.release()); // (will be deleted with outInodeData) outInodeData->setEntryID(newEntryID); } } unsigned entryInfoFlags = ENTRYINFO_FEATURE_INLINED; if (dir.getIsBuddyMirrored()) entryInfoFlags |= ENTRYINFO_FEATURE_BUDDYMIRRORED; EntryInfo newEntryInfo(ownerNodeID, parentEntryID, newEntryID, mkDetails->newName, entryType, entryInfoFlags); // apply access ACL calculated from default ACL if (needsACL) { FhgfsOpsErr setXAttrRes = dir.setXAttr(&newEntryInfo, PosixACL::accessACLXAttrName, aclXAttr, 0, false); if (setXAttrRes != FhgfsOpsErr_SUCCESS) { LogContext(logContext).log(Log_ERR, "Error setting file ACL."); makeRes = FhgfsOpsErr_INTERNAL; } } // only proceed with RST operations if rstInfo has valid version if (rstInfo && !rstInfo->hasInvalidVersion() && (makeRes == FhgfsOpsErr_SUCCESS)) { // reference newly created file auto [fileInode, referenceRes] = referenceFileUnlocked(dir, &newEntryInfo); if (likely(fileInode)) { FhgfsOpsErr setRstRes = fileInode->setRemoteStorageTarget(&newEntryInfo, *rstInfo); if (setRstRes != FhgfsOpsErr_SUCCESS) { LogContext(logContext).log(Log_WARNING, "Failed to set remote storage targets for " "entryID: " + newEntryInfo.getEntryID() + ". RST might be invalid."); } releaseFileUnlocked(dir, fileInode); } else { // critical error: we cannot proceed with RSTs as inode can't be referenced LogContext(logContext).logErr("Unable to reference inode. entryID: " + newEntryInfo.getEntryID() + ". RST operation aborted."); // consider file creation failed if we can't reference the inode makeRes = FhgfsOpsErr_INTERNAL; } } if (outEntryInfo) *outEntryInfo = newEntryInfo; return makeRes; } FhgfsOpsErr MetaStore::makeDirInode(DirInode& inode) { return inode.storePersistentMetaData(); } FhgfsOpsErr MetaStore::makeDirInode(DirInode& inode, const CharVector& defaultACLXAttr, const CharVector& accessACLXAttr) { return inode.storePersistentMetaData(defaultACLXAttr, accessACLXAttr); } FhgfsOpsErr MetaStore::removeDirInode(const std::string& entryID, bool isBuddyMirrored) { UniqueRWLock lock(rwlock, SafeRWLock_READ); return dirStore.removeDirInode(entryID, isBuddyMirrored); } /** * Unlink a non-inlined file inode * * Note: Specialy case without an entryInfo, for fsck only! */ FhgfsOpsErr MetaStore::fsckUnlinkFileInode(const std::string& entryID, bool isBuddyMirrored) { UniqueRWLock lock(rwlock, SafeRWLock_READ); // generic code needs an entryInfo, but most values can be empty for non-inlined inodes NumNodeID ownerNodeID; std::string parentEntryID; std::string fileName; DirEntryType entryType = DirEntryType_REGULARFILE; int flags = 0; EntryInfo entryInfo(ownerNodeID, parentEntryID, entryID, fileName, entryType, flags); if (isBuddyMirrored) entryInfo.setBuddyMirroredFlag(true); return this->fileStore.unlinkFileInode(&entryInfo, NULL); } /** * @param subDir may be NULL and then needs to be referenced */ FhgfsOpsErr MetaStore::unlinkInodeUnlocked(EntryInfo* entryInfo, DirInode* subDir, std::unique_ptr* outInode) { if (this->fileStore.isInStore(entryInfo->getEntryID())) return fileStore.unlinkFileInode(entryInfo, outInode); if (subDir) return subDir->fileStore.unlinkFileInode(entryInfo, outInode); // not in global /store/map, now per directory // Note: We assume, that if the file is buddy mirrored, the parent is mirrored, too subDir = referenceDirUnlocked(entryInfo->getParentEntryID(), entryInfo->getIsBuddyMirrored(), false); if (!subDir) return FhgfsOpsErr_PATHNOTEXISTS; UniqueRWLock subDirLock(subDir->rwlock, SafeRWLock_READ); FhgfsOpsErr unlinkRes = subDir->fileStore.unlinkFileInode(entryInfo, outInode); // we can release the DirInode here, as the FileInode is not supposed to be in the // DirInodes FileStore anymore subDirLock.unlock(); releaseDirUnlocked(entryInfo->getParentEntryID()); return unlinkRes; } /** * @param outFile will be set to the unlinked file and the object must then be deleted by the caller * (can be NULL if the caller is not interested in the file) */ FhgfsOpsErr MetaStore::unlinkInode(EntryInfo* entryInfo, std::unique_ptr* outInode) { UniqueRWLock lock(rwlock, SafeRWLock_READ); return unlinkInodeUnlocked(entryInfo, NULL, outInode); } /** * need the following locks: * this->rwlock: SafeRWLock_WRITE * subDir: reference * subdir->rwlock: SafeRWLock_WRITE * * note: caller needs to delete storage chunk files. E.g. via MsgHelperUnlink::unlinkLocalFile() */ FhgfsOpsErr MetaStore::unlinkFileUnlocked(DirInode& subdir, const std::string& fileName, std::unique_ptr* outInode, EntryInfo* outEntryInfo, bool& outWasInlined, unsigned& outNumHardlinks) { FhgfsOpsErr retVal; std::unique_ptr dirEntry(subdir.dirEntryCreateFromFileUnlocked(fileName)); if (!dirEntry) return FhgfsOpsErr_PATHNOTEXISTS; // dirEntry exists time to make sure we have loaded subdir bool loadRes = subdir.loadIfNotLoadedUnlocked(); if (unlikely(!loadRes) ) return FhgfsOpsErr_INTERNAL; // if dirEntry exists, subDir also has to exist! // set the outEntryInfo int additionalFlags = 0; std::string parentEntryID = subdir.getID(); dirEntry->getEntryInfo(parentEntryID, additionalFlags, outEntryInfo); if (dirEntry->getIsInodeInlined() ) { // inode is inlined into the dir-entry retVal = unlinkDirEntryWithInlinedInodeUnlocked(fileName, subdir, dirEntry.get(), DirEntry_UNLINK_ID_AND_FILENAME, outInode, outNumHardlinks); outWasInlined = true; } else { // inode and dir-entry are separated fileStore retVal = unlinkDentryAndInodeUnlocked(fileName, subdir, dirEntry.get(), DirEntry_UNLINK_ID_AND_FILENAME, outInode, outNumHardlinks); outWasInlined = false; } return retVal; } /** * Unlinks the entire file, so dir-entry and inode. * * @param fileName friendly name * @param outEntryInfo contains the entryInfo of the unlinked file * @param outInode will be set to the unlinked (owned) file and the object and * storage server fileStore must then be deleted by the caller; even if success is returned, * this might be NULL (e.g. because the file is in use and was added to the disposal directory); * can be NULL if the caller is not interested in the file * @param outNumHardlinks will be set to the number of hardlinks the file had before the unlink * operation. This is mainly needed for event logging purposes. * @return normal fhgfs error code, normally succeeds even if a file was open; special case is * when this is called to unlink a file with the disposalDir dirID, then an open file will * result in a inuse-error (required for online_cfg mode=dispose) * * note: caller needs to delete storage chunk files. E.g. via MsgHelperUnlink::unlinkLocalFile() */ FhgfsOpsErr MetaStore::unlinkFile(DirInode& dir, const std::string& fileName, EntryInfo* outEntryInfo, std::unique_ptr* outInode, unsigned& outNumHardlinks) { const char* logContext = "Unlink File"; FhgfsOpsErr retVal = FhgfsOpsErr_PATHNOTEXISTS; UniqueRWLock lock(rwlock, SafeRWLock_READ); UniqueRWLock subDirLock(dir.rwlock, SafeRWLock_WRITE); bool wasInlined; retVal = unlinkFileUnlocked(dir, fileName, outInode, outEntryInfo, wasInlined, outNumHardlinks); subDirLock.unlock(); /* Give up the read-lock here, unlinkInodeLater() will aquire a write lock. We already did * most of the work, just possible back linking of the inode to the disposal dir is missing. * As our important work is done, we can also risk to give up the read lock. */ lock.unlock(); // U N L O C K if (retVal != FhgfsOpsErr_INUSE) return retVal; if (dir.getID() != META_DISPOSALDIR_ID_STR && dir.getID() != META_MIRRORDISPOSALDIR_ID_STR) { // we already successfully deleted the dentry, so all fine for the user retVal = FhgfsOpsErr_SUCCESS; } FhgfsOpsErr laterRes = unlinkInodeLater(outEntryInfo, wasInlined); if (laterRes == FhgfsOpsErr_AGAIN) { /* So the inode was not referenced in memory anymore and probably close() already deleted * it. Just make sure here it really does not exist anymore */ FhgfsOpsErr inodeUnlinkRes = unlinkInode(outEntryInfo, outInode); if (unlikely((inodeUnlinkRes != FhgfsOpsErr_PATHNOTEXISTS) && (inodeUnlinkRes != FhgfsOpsErr_SUCCESS) ) ) { LogContext(logContext).logErr(std::string("Failed to unlink inode. Error: ") + boost::lexical_cast(inodeUnlinkRes)); retVal = inodeUnlinkRes; } } return retVal; } /** * * Decrement nlink count and remove file inode if link count reaches zero */ FhgfsOpsErr MetaStore::unlinkFileInode(EntryInfo* delFileInfo, std::unique_ptr* outInode, unsigned& outNumHardlinks) { const char* logContext = "Unlink File Inode"; UniqueRWLock lock(rwlock, SafeRWLock_WRITE); FhgfsOpsErr retVal; FhgfsOpsErr isUnlinkable = this->fileStore.isUnlinkable(delFileInfo); if (isUnlinkable != FhgfsOpsErr_INUSE && isUnlinkable != FhgfsOpsErr_SUCCESS) return isUnlinkable; if (isUnlinkable == FhgfsOpsErr_INUSE) { FileInode* inode = this->fileStore.referenceLoadedFile(delFileInfo->getEntryID()); if (unlikely(!inode)) { LogContext(logContext).logErr("Busy/Inuse file inode found but failed to reference it." " EntryID: " + delFileInfo->getEntryID()); return FhgfsOpsErr_INTERNAL; } outNumHardlinks = inode->getNumHardlinks(); if (outNumHardlinks < 2) { retVal = FhgfsOpsErr_INUSE; } else { retVal = FhgfsOpsErr_SUCCESS; } FhgfsOpsErr decRes = this->fileStore.decLinkCount(*inode, delFileInfo); if (decRes != FhgfsOpsErr_SUCCESS) { LogContext(logContext).logErr("Failed to decrease the link count. entryID: " + delFileInfo->getEntryID()); } this->fileStore.releaseFileInode(inode); } else { // File is not IN_USE so decrement link count and if link count reach zero then // delete inode file from disk auto [inode, referenceRes] = referenceFileUnlocked(delFileInfo); if (unlikely(!inode)) return referenceRes; outNumHardlinks = inode->getNumHardlinks(); if (outNumHardlinks > 1) { FhgfsOpsErr decRes = this->fileStore.decLinkCount(*inode, delFileInfo); if (decRes != FhgfsOpsErr_SUCCESS) { LogContext(logContext).logErr("Failed to decrease the link count. entryID: " + delFileInfo->getEntryID()); } releaseFileUnlocked(delFileInfo->getParentEntryID(), inode); retVal = FhgfsOpsErr_SUCCESS; } else { // release inode reference before calling unlinkFileInode() // because it checks for IN_USE situation releaseFileUnlocked(delFileInfo->getParentEntryID(), inode); retVal = this->fileStore.unlinkFileInode(delFileInfo, outInode); } } if (retVal != FhgfsOpsErr_INUSE) return retVal; lock.unlock(); // U N L O C K const std::string& parentEntryID = delFileInfo->getParentEntryID(); if (parentEntryID != META_DISPOSALDIR_ID_STR && parentEntryID != META_MIRRORDISPOSALDIR_ID_STR) { retVal = FhgfsOpsErr_SUCCESS; } FhgfsOpsErr laterRes = unlinkInodeLater(delFileInfo, false); if (laterRes == FhgfsOpsErr_AGAIN) { // so the inode was not referenced in memory anymore and probably close() already // deleted it. Just make sure here that it really doesn't exists anymore. FhgfsOpsErr inodeUnlinkRes = unlinkInode(delFileInfo, outInode); if (unlikely((inodeUnlinkRes != FhgfsOpsErr_PATHNOTEXISTS) && (inodeUnlinkRes != FhgfsOpsErr_SUCCESS) ) ) { LogContext(logContext).logErr(std::string("Failed to unlink inode. Error: ") + boost::lexical_cast(inodeUnlinkRes)); retVal = inodeUnlinkRes; } } return retVal; } /** * Unlink a dirEntry with an inlined inode */ FhgfsOpsErr MetaStore::unlinkDirEntryWithInlinedInodeUnlocked(const std::string& entryName, DirInode& subDir, DirEntry* dirEntry, unsigned unlinkTypeFlags, std::unique_ptr* outInode, unsigned& outNumHardlinks) { const char* logContext = "Unlink DirEntry with inlined inode"; if (outInode) outInode->reset(); std::string parentEntryID = subDir.getID(); // when we are here, we no the inode is inlined into the dirEntry int flags = ENTRYINFO_FEATURE_INLINED; EntryInfo entryInfo; dirEntry->getEntryInfo(parentEntryID, flags, &entryInfo); FhgfsOpsErr isUnlinkable = isFileUnlinkable(subDir, &entryInfo); // note: FhgfsOpsErr_PATHNOTEXISTS cannot happen with isUnlinkable(id, false) if (isUnlinkable != FhgfsOpsErr_INUSE && isUnlinkable != FhgfsOpsErr_SUCCESS) return isUnlinkable; if (isUnlinkable == FhgfsOpsErr_INUSE) { FhgfsOpsErr retVal; // for some reasons we cannot unlink the inode, probably the file is opened. De-inline it. // *outInode stays NULL - the caller must not do anything with this inode MetaFileHandle inode = referenceLoadedFileUnlocked(subDir, dirEntry->getEntryID() ); if (!inode) { LogContext(logContext).logErr("Bug: Busy inode found, but failed to reference it." "FileName: " + entryName + ", EntryID: " + dirEntry->getEntryID()); return FhgfsOpsErr_INTERNAL; } outNumHardlinks = inode->getNumHardlinks(); if (outNumHardlinks > 1) unlinkTypeFlags &= ~DirEntry_UNLINK_ID; bool unlinkError = subDir.unlinkBusyFileUnlocked(entryName, dirEntry, unlinkTypeFlags); if (unlinkError) retVal = FhgfsOpsErr_INTERNAL; else { // unlink success if (outNumHardlinks < 2) { retVal = FhgfsOpsErr_INUSE; // The inode is not inlined anymore, update the in-memory objects dirEntry->unsetInodeInlined(); inode->setIsInlined(false); } else { // hard link exists, the inode is still inlined retVal = FhgfsOpsErr_SUCCESS; } /* Decrease the link count, but as dentry and inode are still hard linked objects, * it also unsets the DENTRY_FEATURE_INODE_INLINE on disk */ FhgfsOpsErr decRes = subDir.fileStore.decLinkCount(*inode, &entryInfo); if (decRes != FhgfsOpsErr_SUCCESS) { LogContext(logContext).logErr("Failed to decrease the link count!" " parentID: " + entryInfo.getParentEntryID() + " entryID: " + entryInfo.getEntryID() + " fileName: " + entryName); } } releaseFileUnlocked(subDir, inode); return retVal; /* We are done here, but the file still might be referenced in the per-dir file store. * However, we cannot move it, as we do not have a MetaStore write-lock, but only a * read-lock. Therefore that has to be done later on, once we have given up the read-lock. */ } // here, unlinkRes == SUCCESS. // dir-entry and inode are inlined. The file is also not opened anymore, so delete it. if (!(unlinkTypeFlags & DirEntry_UNLINK_ID)) { // only the dentry-by-name is left, so no need to care about the inode return subDir.unlinkDirEntryUnlocked(entryName, dirEntry, unlinkTypeFlags); } auto [inode, referenceRes] = referenceFileUnlocked(subDir, &entryInfo); if (!inode) return referenceRes; outNumHardlinks = inode->getNumHardlinks(); if (outNumHardlinks > 1) unlinkTypeFlags &= ~DirEntry_UNLINK_ID; FhgfsOpsErr retVal = subDir.unlinkDirEntryUnlocked(entryName, dirEntry, unlinkTypeFlags); if (retVal == FhgfsOpsErr_SUCCESS && outNumHardlinks > 1) { FhgfsOpsErr decRes = subDir.fileStore.decLinkCount(*inode, &entryInfo); if (decRes != FhgfsOpsErr_SUCCESS) { LogContext(logContext).logErr("Failed to decrease the link count!" " parentID: " + entryInfo.getParentEntryID() + " entryID: " + entryInfo.getEntryID() + " entryNameName: " + entryName); } } if (outInode && outNumHardlinks < 2) { inode->setIsInlined(false); // last dirEntry gone, so not inlined anymore outInode->reset(inode->clone()); } releaseFileUnlocked(subDir, inode); return retVal; } /** * Unlink seperated dirEntry and Inode */ FhgfsOpsErr MetaStore::unlinkDentryAndInodeUnlocked(const std::string& fileName, DirInode& subdir, DirEntry* dirEntry, unsigned unlinkTypeFlags, std::unique_ptr* outInode, unsigned& outNumHardlinks) { const char* logContext = "Unlink DirEntry with non-inlined inode"; // unlink dirEntry first FhgfsOpsErr retVal = subdir.unlinkDirEntryUnlocked(fileName, dirEntry, unlinkTypeFlags); if (retVal != FhgfsOpsErr_SUCCESS) return retVal; // directory-entry was removed => unlink inode // if we are here we know the dir-entry does not inline the inode int addionalEntryInfoFlags = 0; std::string parentEntryID = subdir.getID(); EntryInfo entryInfo; dirEntry->getEntryInfo(parentEntryID, addionalEntryInfoFlags, &entryInfo); FhgfsOpsErr isUnlinkable = isFileUnlinkable(subdir, &entryInfo); if (isUnlinkable != FhgfsOpsErr_INUSE && isUnlinkable != FhgfsOpsErr_SUCCESS) return isUnlinkable; if (isUnlinkable == FhgfsOpsErr_INUSE) { MetaFileHandle inode = referenceLoadedFileUnlocked(subdir, dirEntry->getEntryID()); if (unlikely(!inode)) { LogContext(logContext).logErr("Busy/Inuse file inode found but failed to reference it." "FileName: " + fileName + ", EntryID: " + dirEntry->getEntryID()); return FhgfsOpsErr_INTERNAL; } outNumHardlinks = inode->getNumHardlinks(); if (outNumHardlinks > 1) { retVal = FhgfsOpsErr_SUCCESS; } else { retVal = FhgfsOpsErr_INUSE; } FhgfsOpsErr decRes = subdir.fileStore.decLinkCount(*inode, &entryInfo); if (decRes != FhgfsOpsErr_SUCCESS) { LogContext(logContext).logErr("Failed to decrease the link count." " parentEntryID: " + parentEntryID + ", entryID: " + entryInfo.getEntryID() + ", entryName: " + fileName); } releaseFileUnlocked(subdir, inode); return retVal; } else { auto [inode, referenceRes] = referenceFileUnlocked(subdir, &entryInfo); if (unlikely(!inode)) return referenceRes; outNumHardlinks = inode->getNumHardlinks(); if (outNumHardlinks > 1) { FhgfsOpsErr decRes = subdir.fileStore.decLinkCount(*inode, &entryInfo); if (decRes != FhgfsOpsErr_SUCCESS) { LogContext(logContext).logErr("Failed to decrease the link count." " parentEntryID: " + parentEntryID + ", entryID: " + entryInfo.getEntryID() + ", entryName: " + fileName); } releaseFileUnlocked(subdir, inode); return FhgfsOpsErr_SUCCESS; } else { releaseFileUnlocked(subdir, inode); return unlinkInodeUnlocked(&entryInfo, &subdir, outInode); } } } /** * Adds the entry to the disposal directory for later (asynchronous) disposal. */ FhgfsOpsErr MetaStore::unlinkInodeLater(EntryInfo* entryInfo, bool wasInlined) { UniqueRWLock lock(rwlock, SafeRWLock_WRITE); return unlinkInodeLaterUnlocked(entryInfo, wasInlined); } /** * Adds the inode (with a new dirEntry) to the disposal directory for later * (on-close or asynchronous) disposal. * * Note: We are going to set outInode if we determine that the file was closed in the mean time * and all references to this inode shall be deleted. */ FhgfsOpsErr MetaStore::unlinkInodeLaterUnlocked(EntryInfo* entryInfo, bool wasInlined) { // Note: We must not try to unlink the inode here immediately, because then the local versions // of the data-object (on the storage nodes) would never be deleted. App* app = Program::getApp(); FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS; const std::string& parentEntryID = entryInfo->getParentEntryID(); const std::string& entryID = entryInfo->getEntryID(); bool isBuddyMirrored = entryInfo->getIsBuddyMirrored(); DirInode* disposalDir = isBuddyMirrored ? app->getBuddyMirrorDisposalDir() : app->getDisposalDir(); const std::string& disposalID = disposalDir->getID(); /* This requires a MetaStore write lock, therefore only can be done here and not in common * unlink code, as the common unlink code only has a MetaStore read-lock. */ if (!this->fileStore.isInStore(entryID) ) { // Note: we assume that if the inode is mirrored, the parent is mirrored, too bool moveRes = moveReferenceToMetaFileStoreUnlocked(parentEntryID, isBuddyMirrored, entryID); if (!moveRes) return FhgfsOpsErr_INTERNAL; /* a critical error happened, better don't do * anything with this inode anymore */ } entryInfo->setParentEntryID(disposalID); // update the dirID to disposalDir // set inode nlink count to 0. // we assume the inode is typically already referenced by someone (otherwise we wouldn't need to // unlink later) and this allows for faster checking on-close than the disposal dir check. // do not load the inode from disk first. FileInode* inode = this->fileStore.referenceLoadedFile(entryID); if (!inode) { /* If we cannot reference the inode from memory, we raced with close(). * This is possible as we gave up all locks in unlinkFile() and it means the inode/file shall * be deleted entirely on disk now. */ entryInfo->setInodeInlinedFlag(false); return FhgfsOpsErr_AGAIN; } int linkCount = inode->getNumHardlinks();; this->fileStore.releaseFileInode(inode); /* Now link to the disposal-dir if required. If the inode was inlined into the dentry, * the dentry/inode unlink code already links to the disposal dir and we do not need to do * this work. */ if (!wasInlined && linkCount == 0) { const std::string& inodePath = MetaStorageTk::getMetaInodePath( isBuddyMirrored ? app->getBuddyMirrorInodesPath()->str() : app->getInodesPath()->str(), entryID); /* NOTE: If we are going to have another inode-format, than the current * inode-inlined into the dentry, we need to add code for that here. */ disposalDir->linkFileInodeToDir(inodePath, entryID); // use entryID as file name // ignore the return code here, as we cannot do anything about it anyway. } return retVal; } /** * Reads all inodes from the given inodes storage hash subdir. * * Note: This is intended for use by Fsck only. * * Note: Offset is an internal value and should not be assumed to be just 0, 1, 2, 3, ...; * so make sure you use either 0 (at the beginning) or something that has been returned by this * method as outNewOffset. * Note: You have reached the end of the directory when * "outDirInodes->size() + outFileInodes->size() != maxOutInodes". * * * @param hashDirNum number of a hash dir in the "entries" storage subdir * @param lastOffset zero-based offset; represents the native local fs offset; * @param outDirInodes the read directory inodes in the format fsck saves them * @param outFileInodes the read file inodes in the format fsck saves them * @param outNewOffset is only valid if return value indicates success. */ FhgfsOpsErr MetaStore::getAllInodesIncremental(unsigned hashDirNum, int64_t lastOffset, unsigned maxOutInodes, FsckDirInodeList* outDirInodes, FsckFileInodeList* outFileInodes, int64_t* outNewOffset, bool isBuddyMirrored) { const char* logContext = "MetaStore (get all inodes inc)"; App* app = Program::getApp(); MirrorBuddyGroupMapper* bgm = app->getMetaBuddyGroupMapper(); if (isBuddyMirrored && (bgm->getLocalBuddyGroup().secondTargetID == app->getLocalNode().getNumID().val() || bgm->getLocalGroupID() == 0)) return FhgfsOpsErr_SUCCESS; NumNodeID rootNodeNumID = app->getMetaRoot().getID(); NumNodeID localNodeNumID = isBuddyMirrored ? NumNodeID(app->getMetaBuddyGroupMapper()->getLocalGroupID()) : app->getLocalNode().getNumID(); StringList entryIDs; unsigned firstLevelHashDir; unsigned secondLevelHashDir; StorageTk::splitHashDirs(hashDirNum, &firstLevelHashDir, &secondLevelHashDir); FhgfsOpsErr readRes = getAllEntryIDFilesIncremental(firstLevelHashDir, secondLevelHashDir, lastOffset, maxOutInodes, &entryIDs, outNewOffset, isBuddyMirrored); if (readRes != FhgfsOpsErr_SUCCESS) { LogContext(logContext).logErr( "Failed to read inodes from hash dirs; " "HashDir Level 1: " + StringTk::uintToStr(firstLevelHashDir) + "; " "HashDir Level 2: " + StringTk::uintToStr(firstLevelHashDir) ); return readRes; } // the actual entry processing for ( StringListIter entryIDIter = entryIDs.begin(); entryIDIter != entryIDs.end(); entryIDIter++ ) { const std::string& entryID = *entryIDIter; // now try to reference the file and see what we got MetaFileHandle fileInode; DirInode* dirInode = NULL; referenceInode(entryID, isBuddyMirrored, fileInode, dirInode); if (dirInode) { // entry is a directory std::string parentDirID; NumNodeID parentNodeID; dirInode->getParentInfo(&parentDirID, &parentNodeID); NumNodeID ownerNodeID = dirInode->getOwnerNodeID(); // in the unlikely case, that this is the root directory and this MDS is not the owner of // root ignore the entry if (unlikely( (entryID.compare(META_ROOTDIR_ID_STR) == 0) && rootNodeNumID != localNodeNumID) ) continue; // not root => get stat data and create a FsckDirInode with data StatData statData; dirInode->getStatData(statData); UInt16Vector stripeTargets; FsckStripePatternType stripePatternType = FsckTk::stripePatternToFsckStripePattern( dirInode->getStripePattern(), NULL, &stripeTargets); FsckDirInode fsckDirInode(entryID, parentDirID, parentNodeID, ownerNodeID, statData.getFileSize(), statData.getNumHardlinks(), stripeTargets, stripePatternType, localNodeNumID, isBuddyMirrored, true, dirInode->getIsBuddyMirrored() != isBuddyMirrored); outDirInodes->push_back(fsckDirInode); this->releaseDir(entryID); } else if (fileInode) { // directory not successful => must be a file-like object // create a FsckFileInode with data std::string parentDirID; NumNodeID parentNodeID; UInt16Vector stripeTargets; PathInfo pathInfo; unsigned userID; unsigned groupID; int64_t fileSize; unsigned numHardLinks; uint64_t numBlocks; StatData statData; fileInode->getPathInfo(&pathInfo); FhgfsOpsErr statRes = fileInode->getStatData(statData); if ( statRes == FhgfsOpsErr_SUCCESS ) { userID = statData.getUserID(); groupID = statData.getGroupID(); fileSize = statData.getFileSize(); numHardLinks = statData.getNumHardlinks(); numBlocks = statData.getNumBlocks(); } else { // couldn't get the stat data LogContext(logContext).logErr(std::string("Unable to stat file inode: ") + entryID + std::string(". SysErr: ") + boost::lexical_cast(statRes)); userID = 0; groupID = 0; fileSize = 0; numHardLinks = 0; numBlocks = 0; } StripePattern* stripePattern = fileInode->getStripePattern(); unsigned chunkSize; FsckStripePatternType stripePatternType = FsckTk::stripePatternToFsckStripePattern( stripePattern, &chunkSize, &stripeTargets); FsckFileInode fsckFileInode(entryID, parentDirID, parentNodeID, pathInfo, userID, groupID, fileSize, numHardLinks, numBlocks, stripeTargets, stripePatternType, chunkSize, localNodeNumID, 0, 0, false, isBuddyMirrored, true, fileInode->getIsBuddyMirrored() != isBuddyMirrored); outFileInodes->push_back(fsckFileInode); // parentID is absolutely irrelevant here, because we know that this inode is not inlined this->releaseFile("", fileInode); } else { // something went wrong with inode loading // create a dir inode as dummy const UInt16Vector stripeTargets; const FsckDirInode fsckDirInode(entryID, "", NumNodeID(), NumNodeID(), 0, 0, stripeTargets, FsckStripePatternType_INVALID, localNodeNumID, isBuddyMirrored, false, false); outDirInodes->push_back(fsckDirInode); } } // end of for loop return FhgfsOpsErr_SUCCESS; } /** * Reads all raw entryID filenames from the given "inodes" storage hash subdirs. * * Note: This is intended for use by Fsck. * * Note: Offset is an internal value and should not be assumed to be just 0, 1, 2, 3, ...; * so make sure you use either 0 (at the beginning) or something that has been returned by this * method as outNewOffset. * * @param hashDirNum number of a hash dir in the "entries" storage subdir * @param lastOffset zero-based offset; represents the native local fs offset; * @param outEntryIDFiles the raw filenames of the entries in the given hash dir (so you will need * to remove filename suffixes to use these as entryIDs). * @param outNewOffset is only valid if return value indicates success. * */ FhgfsOpsErr MetaStore::getAllEntryIDFilesIncremental(unsigned firstLevelhashDirNum, unsigned secondLevelhashDirNum, int64_t lastOffset, unsigned maxOutEntries, StringList* outEntryIDFiles, int64_t* outNewOffset, bool buddyMirrored) { const char* logContext = "Inode (get entry files inc)"; App* app = Program::getApp(); FhgfsOpsErr retVal = FhgfsOpsErr_INTERNAL; uint64_t numEntries = 0; struct dirent* dirEntry = NULL; const std::string inodesPath = buddyMirrored ? app->getBuddyMirrorInodesPath()->str() : app->getInodesPath()->str(); const std::string path = StorageTkEx::getMetaInodeHashDir( inodesPath, firstLevelhashDirNum, secondLevelhashDirNum); UniqueRWLock lock(rwlock, SafeRWLock_READ); DIR* dirHandle = opendir(path.c_str() ); if(!dirHandle) { LogContext(logContext).logErr(std::string("Unable to open entries directory: ") + path + ". SysErr: " + System::getErrString() ); return FhgfsOpsErr_INTERNAL; } errno = 0; // recommended by posix (readdir(3p) ) // seek to offset if(lastOffset) seekdir(dirHandle, lastOffset); // (seekdir has no return value) // the actual entry reading for( ; (numEntries < maxOutEntries) && (dirEntry = StorageTk::readdirFiltered(dirHandle) ); numEntries++) { outEntryIDFiles->push_back(dirEntry->d_name); *outNewOffset = dirEntry->d_off; } if(!dirEntry && errno) { LogContext(logContext).logErr(std::string("Unable to fetch entries directory entry from: ") + path + ". SysErr: " + System::getErrString() ); } else { // all entries read retVal = FhgfsOpsErr_SUCCESS; } closedir(dirHandle); return retVal; } void MetaStore::getReferenceStats(size_t* numReferencedDirs, size_t* numReferencedFiles) { UniqueRWLock lock(rwlock, SafeRWLock_READ); *numReferencedDirs = dirStore.getSize(); *numReferencedFiles = fileStore.getSize(); } void MetaStore::getCacheStats(size_t* numCachedDirs) { UniqueRWLock lock(rwlock, SafeRWLock_READ); *numCachedDirs = dirStore.getCacheSize(); } /** * Asynchronous cache sweep. * * @return true if a cache flush was triggered, false otherwise */ bool MetaStore::cacheSweepAsync() { UniqueRWLock lock(rwlock, SafeRWLock_READ); return dirStore.cacheSweepAsync(); } /** * So we failed to delete chunk files and need to create a new disposal file for later cleanup. * * @param inode will be deleted (or owned by another object) no matter whether this succeeds or not * * Note: No MetaStore lock required, as the disposal dir cannot be removed. */ FhgfsOpsErr MetaStore::insertDisposableFile(FileInode* inode) { LogContext log("MetaStore (insert disposable file)"); App* app = Program::getApp(); DirInode* disposalDir = inode->getIsBuddyMirrored() ? Program::getApp()->getBuddyMirrorDisposalDir() : Program::getApp()->getDisposalDir(); UniqueRWLock metaLock(this->rwlock, SafeRWLock_READ); UniqueRWLock dirLock(disposalDir->rwlock, SafeRWLock_WRITE); const std::string& fileName = inode->getEntryID(); // ID is also the file name DirEntryType entryType = MetadataTk::posixFileTypeToDirEntryType(inode->getMode() ); EntryInfo entryInfo; NumNodeID ownerNodeID = inode->getIsBuddyMirrored() ? NumNodeID(app->getMetaBuddyGroupMapper()->getLocalGroupID() ) : app->getLocalNode().getNumID(); entryInfo.set(ownerNodeID, "", inode->getEntryID(), "", entryType, 0); FhgfsOpsErr retVal = mkMetaFileUnlocked(*disposalDir, fileName, &entryInfo, inode); if (retVal != FhgfsOpsErr_SUCCESS) { log.log(Log_WARNING, std::string("Failed to create disposal file for id: ") + fileName + "; " "Storage chunks will not be entirely deleted!"); } return retVal; } /** * Retrieves entry data for a given file or directory. * * @param dirInode Pointer to the parent directory inode. * @param entryName Name of the entry to retrieve data for. * @param outInfo Pointer to store the retrieved EntryInfo. * @param outInodeMetaData Pointer to store inode metadata (may be NULL). * * @return A pair of: * - FhgfsOpsErr * SUCCESS if the entry was found and is not referenced * DYNAMICATTRIBSOUTDATED if the entry was found but might have outdated attributes * PATHNOTEXISTS if the entry does not exist * - bool: true if entry is a regular file and is currently open, false otherwise * * Locking: No lock must be taken already. */ std::pair MetaStore::getEntryData(DirInode *dirInode, const std::string& entryName, EntryInfo* outInfo, FileInodeStoreData* outInodeMetaData) { FhgfsOpsErr retVal = dirInode->getEntryData(entryName, outInfo, outInodeMetaData); if (retVal == FhgfsOpsErr_SUCCESS && DirEntryType_ISREGULARFILE(outInfo->getEntryType())) { /* Hint for the caller not to rely on outInodeMetaData, properly handling close-races * is too difficult and in the end probably slower than to just re-get the EAs from the * inode (fsIDs/entryID). */ return {FhgfsOpsErr_DYNAMICATTRIBSOUTDATED, false}; } if (retVal == FhgfsOpsErr_DYNAMICATTRIBSOUTDATED) return {retVal, true}; // entry is a regular file and currently open return {retVal, false}; } /** * Get inode disk data for non-inlined inode * * @param outInodeMetaData might be NULL * @return FhgfsOpsErr_SUCCESS if inode exists * FhgfsOpsErr_PATHNOTEXISTS if the inode does not exist * * Locking: No lock must be taken already. */ FhgfsOpsErr MetaStore::getEntryData(EntryInfo* inEntryInfo, FileInodeStoreData* outInodeMetaData) { std::unique_ptr inode(FileInode::createFromEntryInfo(inEntryInfo)); if (unlikely(!inode)) return FhgfsOpsErr_PATHNOTEXISTS; *outInodeMetaData = *(inode->getInodeDiskData()); inode->getInodeDiskData()->setPattern(NULL); return FhgfsOpsErr_SUCCESS; } /** * Create a hard-link within a directory. */ FhgfsOpsErr MetaStore::linkInSameDir(DirInode& parentDir, EntryInfo* fromFileInfo, const std::string& fromName, const std::string& toName) { const char* logContext = "link in same dir"; UniqueRWLock metaLock(rwlock, SafeRWLock_READ); UniqueRWLock parentDirLock(parentDir.rwlock, SafeRWLock_WRITE); auto [fromFileInode, retVal] = referenceFileUnlocked(parentDir, fromFileInfo); if (!fromFileInode) { goto outUnlock; } if (!fromFileInode->getIsInlined() ) { // not supported retVal = FhgfsOpsErr_INTERNAL; goto outReleaseInode; } if (this->fileStore.isInStore(fromFileInfo->getEntryID() ) ) { // not supported retVal = FhgfsOpsErr_INTERNAL; goto outReleaseInode; } else { // not in global /store/map, now per directory FhgfsOpsErr incRes = parentDir.fileStore.incLinkCount(*fromFileInode, fromFileInfo); if (incRes != FhgfsOpsErr_SUCCESS) { retVal = FhgfsOpsErr_INTERNAL; goto outReleaseInode; } retVal = parentDir.linkFilesInDirUnlocked(fromName, *fromFileInode, toName); if (retVal != FhgfsOpsErr_SUCCESS) { FhgfsOpsErr decRes = parentDir.fileStore.decLinkCount(*fromFileInode, fromFileInfo); if (decRes != FhgfsOpsErr_SUCCESS) LogContext(logContext).logErr("Warning: Creating the link failed and decreasing " "the inode link count again now also failed!" " parentDir : " + parentDir.getID() + " entryID: " + fromFileInfo->getEntryID() + " fileName: " + fromName); } } outReleaseInode: releaseFileUnlocked(parentDir, fromFileInode); outUnlock: parentDirLock.unlock(); return retVal; } /** * Create a new hardlink. * * 1. De-inline inode if it's inlined. * 2. Increment link count. * * @param fromFileInfo Pointer to EntryInfo of the target file. * @return Pair of: * - FhgfsOpsErr: Operation result (SUCCESS or error). * - unsigned: Updated hardlink count on success, 0 on failure. */ std::pair MetaStore::makeNewHardlink(EntryInfo* fromFileInfo) { UniqueRWLock metaLock(rwlock, SafeRWLock_WRITE); FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS; unsigned updatedLinkCount = 0; // try to load an unreferenced inode from disk std::unique_ptr fInode(FileInode::createFromEntryInfo(fromFileInfo)); if (unlikely(!fInode)) return {FhgfsOpsErr_PATHNOTEXISTS, 0}; // get inlined flag from on-disk metadata and perform inode de-inline if required bool isInlined = fInode->getIsInlined(); if (isInlined) { DirInode* dir = referenceDirUnlocked(fromFileInfo->getParentEntryID(), fromFileInfo->getIsBuddyMirrored(), true); if (unlikely(!dir)) return {FhgfsOpsErr_PATHNOTEXISTS, 0}; retVal = verifyAndMoveFileInodeUnlocked(*dir, fromFileInfo, MODE_DEINLINE); releaseDirUnlocked(dir->getID()); } if (retVal == FhgfsOpsErr_SUCCESS) { // move existing inode references from dir specific store to global store // to make sure we always use global store for non-inlined file inode(s) if (!this->fileStore.isInStore(fromFileInfo->getEntryID())) { this->moveReferenceToMetaFileStoreUnlocked(fromFileInfo->getParentEntryID(), fromFileInfo->getIsBuddyMirrored(), fromFileInfo->getEntryID()); } auto [inode, referenceRes] = referenceFileUnlocked(fromFileInfo); if (unlikely(!inode)) return {referenceRes, 0}; // ensure inode is not marked for disposal (link count must be >= 1) if (inode->getNumHardlinks() >= 1) { if (!inode->incDecNumHardLinks(fromFileInfo, 1)) retVal = FhgfsOpsErr_INTERNAL; else { retVal = FhgfsOpsErr_SUCCESS; updatedLinkCount = inode->getNumHardlinks(); } } else { // can't create a hardlink for disposal inode (link count = 0) retVal = FhgfsOpsErr_PATHNOTEXISTS; } releaseFileUnlocked(fromFileInfo->getParentEntryID(), inode); } // return operation result and updated hardlink count return {retVal, updatedLinkCount}; } /** * Deinline or reinline file's Inode on current meta-data server. It first verifies Inode * location (inlined or deinlined) and then performes requested inode movement if needed * * @returns FhgfsOpsErr_SUCCESS on success. FhgfsOpsErr_PATHNOTEXISTS if file does not exists * FhgfsOpsErr_INTERNAL on any other error * * @param parentDir parent directory's inode object * @param fileInfo entryInfo of file for which inode movement is requested * @param moveMode requested fileInode mode (i.e. deinline or reinline) * */ FhgfsOpsErr MetaStore::verifyAndMoveFileInode(DirInode& parentDir, EntryInfo* fileInfo, FileInodeMode moveMode) { UniqueRWLock metaLock(rwlock, SafeRWLock_WRITE); return verifyAndMoveFileInodeUnlocked(parentDir, fileInfo, moveMode); } FhgfsOpsErr MetaStore::verifyAndMoveFileInodeUnlocked(DirInode& parentDir, EntryInfo* fileInfo, FileInodeMode moveMode) { App* app = Program::getApp(); FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS; if (!parentDir.loadIfNotLoadedUnlocked()) { return FhgfsOpsErr_PATHNOTEXISTS; } DirEntry fileDentry(fileInfo->getFileName()); if (!parentDir.getDentry(fileInfo->getFileName(), fileDentry)) { return FhgfsOpsErr_PATHNOTEXISTS; } if (moveMode == MODE_INVALID) return FhgfsOpsErr_INTERNAL; bool isInodeMoveRequired = ((moveMode == MODE_DEINLINE) && fileDentry.getIsInodeInlined()) || ((moveMode == MODE_REINLINE) && !fileDentry.getIsInodeInlined()); if (isInodeMoveRequired) { // prepare dentry path const Path* dentriesPath = fileInfo->getIsBuddyMirrored() ? app->getBuddyMirrorDentriesPath() : app->getDentriesPath(); std::string dirEntryPath = MetaStorageTk::getMetaDirEntryPath(dentriesPath->str(), parentDir.getID()); switch (moveMode) { case MODE_DEINLINE: { retVal = deinlineFileInode(parentDir, fileInfo, fileDentry, dirEntryPath); break; } case MODE_REINLINE: { retVal = reinlineFileInode(parentDir, fileInfo, fileDentry, dirEntryPath); break; } case MODE_INVALID: break; // added just to please compiler (already handled above) } } else { // due to unexpected failures (like node crash) there may some inconsistencies present in // filesystem like duplicate inode(s) or dentry-by-entryID file missing for an inlined inode // following code takes care to recover from such situations if user re-runs previous failed // operation again (i.e. run beegfs-ctl command again in case of error) switch (moveMode) { case MODE_DEINLINE: { // remove dentry-by-entryID file parentDir.unlinkDirEntry(fileInfo->getFileName(), &fileDentry, DirEntry_UNLINK_ID); break; } case MODE_REINLINE: { // check if duplicate inode exists in inode Tree and remove associated meta file EntryInfo fileInfoCopy(*fileInfo); fileInfoCopy.setInodeInlinedFlag(false); std::unique_ptr inode(FileInode::createFromEntryInfo(&fileInfoCopy)); if (likely(inode)) { FileInode::unlinkStoredInodeUnlocked(fileInfo->getEntryID(), fileInfo->getIsBuddyMirrored()); } break; } case MODE_INVALID: break; // nothing to do } } return retVal; } /** * Helper function of verifyAndMoveFileInode() * * Updates the inode metadata to transform it into a non-inlined inode, copies remote storage * targets and all user-defined extended attributes (XAttrs) from the inlined to the non-inlined * inode, and at last updates the file dentry to reflect the non-inlined state of file inode. * In case of an error, it attempts to roll-back partial changes (i.e. unlink non-inlined inode) * made so far to avoid any filesystem inconsistencies (e.g., duplicate inodes). */ FhgfsOpsErr MetaStore::deinlineFileInode(DirInode& parentDir, EntryInfo* entryInfo, DirEntry& dentry, std::string const& dirEntryPath) { UniqueRWLock dirLock(parentDir.rwlock, SafeRWLock_WRITE); auto [fileInode, referenceRes] = referenceFileUnlocked(parentDir, entryInfo); if (!fileInode) return referenceRes; // 1. Set the inode as non-inlined and save this change to disk fileInode->setIsInlined(false); if (!fileInode->updateInodeOnDisk(entryInfo)) { releaseFileUnlocked(parentDir, fileInode); return FhgfsOpsErr_INTERNAL; } // Lambda function to perform cleanup in case of errors: // a) Unlink the non-inlined inode to maintain filesystem consistency // b) Release the file inode reference auto cleanupOnError = [this, &fileInode = fileInode, &entryInfo = entryInfo, &parentDir= parentDir](FhgfsOpsErr errCode) { FileInode::unlinkStoredInodeUnlocked(entryInfo->getEntryID(), entryInfo->getIsBuddyMirrored()); releaseFileUnlocked(parentDir, fileInode); return errCode; }; // 2. Copy remote storage targets (if present) to non-inlined inode if (fileInode->getIsRstAvailable()) { FhgfsOpsErr retVal = fileInode->setRemoteStorageTarget(entryInfo, *(fileInode->getRemoteStorageTargetInfo())); if (retVal != FhgfsOpsErr_SUCCESS) return cleanupOnError(retVal); } // 3. Copy all user-defined extended attributes (if present) to non-inlined inode StringVector xAttrNames; FhgfsOpsErr listXAttrRes; std::tie(listXAttrRes, xAttrNames) = parentDir.listXAttr(entryInfo); if (listXAttrRes == FhgfsOpsErr_SUCCESS) { for (const auto& xAttrName : xAttrNames) { CharVector xAttrValue; FhgfsOpsErr getXAttrRes; // Retrieve the value of the current xattr from the inlined inode std::tie(getXAttrRes, xAttrValue, std::ignore) = parentDir.getXAttr(entryInfo, xAttrName, XATTR_SIZE_MAX); if (getXAttrRes == FhgfsOpsErr_SUCCESS) { // Set the retrieved xattr value on the non-inlined inode FhgfsOpsErr setRes = fileInode->setXAttr(entryInfo, xAttrName, xAttrValue, 0); if (setRes != FhgfsOpsErr_SUCCESS) return cleanupOnError(setRes); } else return cleanupOnError(getXAttrRes); } } // 4. Update the file dentry // // Modify/update dentry feature flags to indicate that the inode is no // longer inlined into dentry. Save the updated dentry data to disk, // ensuring it gets written in VER-3 format dentry.unsetInodeInlined(); unsigned flags = dentry.getDentryFeatureFlags(); flags &= ~(DENTRY_FEATURE_IS_FILEINODE); dentry.setDentryFeatureFlags(flags); dentry.getInodeStoreData()->setOrigFeature(FileInodeOrigFeature_UNSET); // Save updated dentry object to disk and unlink corresponding // dentry-by-entryID file from '#fSiDs#' directory if successful bool saveRes = dentry.storeUpdatedDirEntry(dirEntryPath); if (saveRes) parentDir.unlinkDirEntryUnlocked(entryInfo->getFileName(), &dentry, DirEntry_UNLINK_ID); else return cleanupOnError(FhgfsOpsErr_INTERNAL); releaseFileUnlocked(parentDir, fileInode); return FhgfsOpsErr_SUCCESS; } /** * Helper function of verifyAndMoveFileInode() * Makes a file Inode inlined. Caller should check if inode is already not inlined */ FhgfsOpsErr MetaStore::reinlineFileInode(DirInode& parentDir, EntryInfo* entryInfo, DirEntry& dentry, std::string const& dirEntryPath) { const char* logContext = "make fileInode Inlined"; auto [fileInode, referenceRes] = referenceFileUnlocked(entryInfo); if (!fileInode) return referenceRes; UniqueRWLock dirLock(parentDir.rwlock, SafeRWLock_WRITE); // 1. set inode specific data in dentry object after updating inode feature flags fileInode->setIsInlined(true); dentry.setFileInodeData(*(fileInode->getInodeDiskData())); fileInode->getInodeDiskData()->setPattern(NULL); // 2. create link in '#fSiDs#' directory for dentry-by-entryID file // needed because an inlined inode always have this link present std::string idPath = MetaStorageTk::getMetaDirEntryIDPath(dirEntryPath) + entryInfo->getEntryID(); std::string namePath = dirEntryPath + "/" + entryInfo->getFileName(); int linkRes = link(namePath.c_str(), idPath.c_str()); if (linkRes) { if (errno != EEXIST) { LogContext(logContext).logErr("Creating dentry-by-entryid file failed: Path: " + idPath + " SysErr: " + System::getErrString()); releaseFileUnlocked(parentDir, fileInode); return FhgfsOpsErr_INTERNAL; } } // 3. now save in-memory dentry data (having inlined inode) onto disk bool saveRes = dentry.storeUpdatedDirEntry(dirEntryPath); // 4. delete non-inlined inode meta file from disk if (saveRes) { if (!FileInode::unlinkStoredInodeUnlocked(entryInfo->getEntryID(), entryInfo->getIsBuddyMirrored())) { releaseFileUnlocked(parentDir, fileInode); return FhgfsOpsErr_INTERNAL; } } releaseFileUnlocked(parentDir, fileInode); return FhgfsOpsErr_SUCCESS; } /** * Check if duplicate inodes exists or not (i.e. both inlined + nonInlined inode) * If yes, then remove nonInlined inode to fix duplicacy * * @param parentDir parant directory of file for which duplicacy of inode needs to be checked * @param entryInfo entry info of file */ FhgfsOpsErr MetaStore::checkAndRepairDupFileInode(DirInode& parentDir, EntryInfo* entryInfo) { UniqueRWLock metaLock(rwlock, SafeRWLock_WRITE); FhgfsOpsErr retVal = FhgfsOpsErr_SUCCESS; if (!parentDir.loadIfNotLoadedUnlocked()) { return FhgfsOpsErr_PATHNOTEXISTS; } // first try to load inlined inode entryInfo->setInodeInlinedFlag(true); auto [inlinedInode, referenceRes] = referenceFileUnlocked(entryInfo); if (!(inlinedInode && inlinedInode->getIsInlined())) { return referenceRes; } UniqueRWLock dirLock(parentDir.rwlock, SafeRWLock_WRITE); // now try to load nonInlined inode entryInfo->setInodeInlinedFlag(false); FileInode* nonInlinedInode = FileInode::createFromEntryInfo(entryInfo); if (!nonInlinedInode) return retVal; // if we are here then we know that both inlined + nonInlined inodes exists and // we can safely remove nonInlined inode to fix duplicacy if (!FileInode::unlinkStoredInodeUnlocked(entryInfo->getEntryID(), entryInfo->getIsBuddyMirrored())) { retVal = FhgfsOpsErr_INTERNAL; } releaseFileUnlocked(parentDir, inlinedInode); SAFE_DELETE(nonInlinedInode); return retVal; } /** * Get the raw contents of the metadata file specified by path. * Note: This is intended to be used by the buddy resyncer only. * * Behavior: * - If extended attributes are enabled (via config): * - Reads the specified attribute using getxattr. * - Supports reading any attribute name. * - If extended attributes are disabled: * - Only allows reading the default attribute (META_XATTR_NAME). * - Reads from file contents for the default attribute. * - Returns an error for any other attribute name. * * @param path The path to the metadata file. * @param contents The attribute contents will be stored in this vector. The vector will be assigned * the new contents; any old contents will be lost. * @param attrName The name of the attribute to read. Must be META_XATTR_NAME if extended attributes * are disabled. * * @returns FhgfsOpsErr_SUCCESS on success. FhgfsOpsErr_PATHNOTEXISTS if file does not exist. * FhgfsOpsErr_INTERNAL on any other error. */ FhgfsOpsErr MetaStore::getRawMetadata(const Path& path, const char* attrName, CharVector& contents) { App* app = Program::getApp(); const bool useXAttrs = app->getConfig()->getStoreUseExtendedAttribs(); const std::string metaPath = app->getMetaPath(); char buf[META_SERBUF_SIZE]; ssize_t readRes; if (useXAttrs) { // Load from Xattr readRes = ::getxattr(path.str().c_str(), attrName, buf, META_SERBUF_SIZE); if (readRes <= 0) { if (readRes == -1 && errno == ENOENT) { LOG(GENERAL, WARNING, "Metadata file does not exist", path); return FhgfsOpsErr_PATHNOTEXISTS; } LOG(GENERAL, WARNING, "Unable to read metadata file", path, sysErr); return FhgfsOpsErr_INTERNAL; } } else { // If xattrs are disabled via config, only allow reading the default xattr (META_XATTR_NAME) if (strcmp(attrName, META_XATTR_NAME) != 0) { LOG(GENERAL, ERR, "Reading a non-default attribute as file contents is not supported.", path, attrName); return FhgfsOpsErr_INVAL; } // Load from file contents. int fd = open(path.str().c_str(), O_NOATIME | O_RDONLY, 0); if (fd == -1) { if (errno != ENOENT) { LOG(GENERAL, WARNING, "Unable to read metadata file", path, sysErr); return FhgfsOpsErr_INTERNAL; } LOG(GENERAL, WARNING, "Metadata file does not exist", path); return FhgfsOpsErr_PATHNOTEXISTS; } readRes = ::read(fd, buf, META_SERBUF_SIZE); if (readRes <= 0) { LOG(GENERAL, ERR, "Unable to read metadata file", path, sysErr); close(fd); return FhgfsOpsErr_INTERNAL; } close(fd); } contents.assign(buf, buf + readRes); return FhgfsOpsErr_SUCCESS; } /** * Create a file or directory for metadata resync. The caller is responsible for filling the * resulting object with metadata content or xattrs. * Note: This is intended to be used by the buddy resynver only. * * A metatada inode which does not exist yet is created. */ std::pair MetaStore::beginResyncFor(const Path& path, bool isDirectory) { // first try to create the path directly, and if that fails with ENOENT (ie a directory in the // path does not exist), create all parent directories and try again. for (int round = 0; round < 2; round++) { int mkRes; if (isDirectory) { mkRes = ::mkdir(path.str().c_str(), 0755); if (mkRes == 0 || errno == EEXIST) mkRes = ::open(path.str().c_str(), O_DIRECTORY); } else mkRes = ::open(path.str().c_str(), O_CREAT | O_TRUNC | O_RDWR, 0644); if (mkRes < 0 && errno == ENOENT && round == 0) { if (!StorageTk::createPathOnDisk(path, true)) { LOG(GENERAL, ERR, "Could not create metadata path", path, isDirectory); return {FhgfsOpsErr_INTERNAL, IncompleteInode{}}; } continue; } if (mkRes < 0) break; return {FhgfsOpsErr_SUCCESS, IncompleteInode(mkRes)}; } LOG(GENERAL, ERR, "Could not create metadata file/directory", path, isDirectory, sysErr); return {FhgfsOpsErrTk::fromSysErr(errno), IncompleteInode{}}; } /** * Deletes a raw metadata file specified by path. * Note: This is intended to be used by the buddy resyncer only. */ FhgfsOpsErr MetaStore::unlinkRawMetadata(const Path& path) { App* app = Program::getApp(); const std::string metaPath = app->getMetaPath(); int unlinkRes = ::unlink(path.str().c_str()); if (!unlinkRes) return FhgfsOpsErr_SUCCESS; if (errno == ENOENT) { LOG(GENERAL, DEBUG, "Metadata file does not exist", path); return FhgfsOpsErr_PATHNOTEXISTS; } LOG(GENERAL, DEBUG, "Error unlinking metadata file", path, sysErr); return FhgfsOpsErr_INTERNAL; } /** * Sets the state of a file and persists it to disk. * This method locks inode in GlobalInodeLockStore to prevent concurrent operations, * verifies the state transition is allowed based on active sessions, and updates its state. * * @param entryInfo The entry information of the file to modify * @param state The new state to set for the file * @return FhgfsOpsErr_SUCCESS if state was successfully updated to disk * FhgfsOpsErr_INODELOCKED if file is locked in global lock store * FhgfsOpsErr_INUSE if the requested state transition would affect active sessions * FhgfsOpsErr_PATHNOTEXISTS if the file or parent directory doesn't exist * FhgfsOpsErr_INTERNAL for other errors */ FhgfsOpsErr MetaStore::setFileState(EntryInfo* entryInfo, const FileState& state) { const char* logContext = "MetaStore (set file state)"; UniqueRWLock metaLock(rwlock, SafeRWLock_READ); // Add inode to global lock store for exclusive access GlobalInodeLockStore* lockStore = this->getInodeLockStore(); if (!lockStore->insertFileInode(entryInfo)) { LogContext(logContext).log(Log_DEBUG, "Inode is locked in global lock store; " "state update rejected. EntryID: " + entryInfo->getEntryID()); return FhgfsOpsErr_INODELOCKED; } DirInode* parentDir = referenceDirUnlocked(entryInfo->getParentEntryID(), entryInfo->getIsBuddyMirrored(), false); if (unlikely(!parentDir)) { lockStore->releaseFileInode(entryInfo->getEntryID()); return FhgfsOpsErr_PATHNOTEXISTS; } // Bypass lock store checks since GlobalInodeLockStore ensures exclusivity. // A narrow race window exists where another thread may reference the inode // between our call to insertFileInode() and referenceFileUnlocked() // (via concurrent operations like open(), stat(), etc.). Such cases are now // handled directly by FileInode::checkAccessFlagTransition() logic, which // verifies if the state change is allowed based on active sessions. // Read-only operations may transiently reference the inode but don't increment // session counters and therefore don't block state transitions. auto [inode, refRes] = referenceFileUnlocked(*parentDir, entryInfo, /* checkLockStore */ false); if (unlikely(!inode)) { releaseDirUnlocked(parentDir->getID()); lockStore->releaseFileInode(entryInfo->getEntryID()); return refRes; } FhgfsOpsErr setRes = inode->setFileState(entryInfo, state); if (setRes != FhgfsOpsErr_SUCCESS) { LogContext(logContext).log(Log_DEBUG, "Failed to set file state. EntryID: " + entryInfo->getEntryID() + ", FileName: " + entryInfo->getFileName()); } // Clean up resources in reverse order of acquisition releaseFileUnlocked(*parentDir, inode); releaseDirUnlocked(parentDir->getID()); lockStore->releaseFileInode(entryInfo->getEntryID()); return setRes; } void MetaStore::invalidateMirroredDirInodes() { dirStore.invalidateMirroredDirInodes(); }