New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

View File

@@ -0,0 +1,85 @@
#include <common/toolkit/Serialization.h>
#include <common/storage/EntryInfo.h>
#include <os/OsDeps.h>
#define MIN_ENTRY_ID_LEN 1 // usually A-B-C, but also "root" and "disposal"
/**
* Serialize into outBuf, 4-byte aligned
*/
void EntryInfo_serialize(SerializeCtx* ctx, const EntryInfo* this)
{
// DirEntryType
Serialization_serializeUInt(ctx, this->entryType);
// featureFlags
Serialization_serializeInt(ctx, this->featureFlags);
// parentEntryID
Serialization_serializeStrAlign4(ctx, strlen(this->parentEntryID), this->parentEntryID);
if (unlikely(strlen(this->entryID) < MIN_ENTRY_ID_LEN) )
{
printk_fhgfs(KERN_WARNING, "EntryID too small!\n"); // server side deserialization will fail
dump_stack();
}
// entryID
Serialization_serializeStrAlign4(ctx, strlen(this->entryID), this->entryID);
// fileName
Serialization_serializeStrAlign4(ctx, strlen(this->fileName), this->fileName);
// ownerNodeID
// also serializes owner.group, if buddymirrored. both MUST have the same size and underlying
// type!
BUILD_BUG_ON(
sizeof(this->owner.node) != sizeof(this->owner.group)
|| !__builtin_types_compatible_p(
__typeof(this->owner.node.value), __typeof(this->owner.group)));
NumNodeID_serialize(ctx, &this->owner.node);
// padding for 4-byte alignment
Serialization_serializeUShort(ctx, 0);
}
/**
* deserialize the given buffer
*/
bool EntryInfo_deserialize(DeserializeCtx* ctx, EntryInfo* outThis)
{
unsigned parentEntryIDLen;
unsigned entryIDLen;
unsigned fileNameLen;
unsigned entryType;
unsigned short padding;
if (!Serialization_deserializeUInt(ctx, &entryType))
return false;
outThis->entryType = (DirEntryType) entryType;
if (!Serialization_deserializeInt(ctx, &outThis->featureFlags)
|| !Serialization_deserializeStrAlign4(ctx, &parentEntryIDLen, &outThis->parentEntryID)
|| !Serialization_deserializeStrAlign4(ctx, &entryIDLen, &outThis->entryID)
|| !Serialization_deserializeStrAlign4(ctx, &fileNameLen, &outThis->fileName))
return false;
outThis->owner.isGroup = outThis->featureFlags & ENTRYINFO_FEATURE_BUDDYMIRRORED;
// also deserializes owner.group, if buddy mirrored.
if (!NumNodeID_deserialize(ctx, &outThis->owner.node))
return false;
// padding for 4-byte alignment
if(!Serialization_deserializeUShort(ctx, &padding))
return false;
// Note: the root ("/") has parentEntryID = ""
if (unlikely((parentEntryIDLen < MIN_ENTRY_ID_LEN) && (parentEntryIDLen > 0)))
return false;
if (unlikely(entryIDLen < MIN_ENTRY_ID_LEN))
return false;
return true;
}

View File

@@ -0,0 +1,208 @@
/*
* class EntryInfo - required information to find an inode or chunk files
*
* NOTE: If you change this file, do not forget to adjust commons EntryInfo.h
*/
#ifndef ENTRYINFO_H_
#define ENTRYINFO_H_
#include <common/nodes/NumNodeID.h>
#include <common/storage/StorageDefinitions.h>
#include <common/toolkit/StringTk.h>
#include <common/toolkit/SerializationTypes.h>
#define ENTRYINFO_FEATURE_INLINED 1 // indicate inlined inode, might be outdated
#define ENTRYINFO_FEATURE_BUDDYMIRRORED 2 // entry is buddy mirrored
#define EntryInfo_PARENT_ID_UNKNOWN "unknown"
struct EntryInfo;
typedef struct EntryInfo EntryInfo;
static inline void EntryInfo_uninit(EntryInfo* this);
static inline void EntryInfo_dup(const EntryInfo *inEntryInfo, EntryInfo *outEntryInfo);
static inline void EntryInfo_update(EntryInfo* this, const EntryInfo* newEntryInfo);
static inline void EntryInfo_updateParentEntryID(EntryInfo *this, const char *newParentID);
static inline void EntryInfo_updateSetParentEntryID(EntryInfo *this, const char *newParentID);
static inline char const* EntryInfo_getParentEntryID(const EntryInfo* this);
static inline char const* EntryInfo_getEntryID(const EntryInfo* this);
static inline void EntryInfo_updateFileName(EntryInfo *this, const char* newFileName);
static inline bool EntryInfo_getIsBuddyMirrored(const EntryInfo *this);
extern void EntryInfo_serialize(SerializeCtx* ctx, const EntryInfo* this);
extern bool EntryInfo_deserialize(DeserializeCtx* ctx, EntryInfo* outThis);
typedef struct NodeOrGroup
{
union
{
NumNodeID node;
uint32_t group;
};
bool isGroup;
} NodeOrGroup;
static inline NodeOrGroup NodeOrGroup_fromNode(NumNodeID node)
{
NodeOrGroup result = { .isGroup = false };
result.node = node;
return result;
}
static inline NodeOrGroup NodeOrGroup_fromGroup(uint32_t group)
{
NodeOrGroup result = { .isGroup = true };
result.group = group;
return result;
}
static inline bool NodeOrGroup_valid(NodeOrGroup id)
{
return id.group != 0;
}
/**
* minimal information about an entry (file/directory/...)
*/
struct EntryInfo
{
// serialization will work as long as the elements in here have the same size and
// underlying type(!)
NodeOrGroup owner;
const char* parentEntryID;
const char* entryID;
const char* fileName;
DirEntryType entryType;
int featureFlags;
};
/**
* Main initialization function for EntryInfo, should typically be used
*
* @param parentEntryID will be free'd on uninit
* @param entryID will be free'd on uninit
* @param fileName will be free'd on uninit
*/
static inline void EntryInfo_init(EntryInfo* this, NodeOrGroup owner,
const char* parentEntryID, const char* entryID, const char* fileName, DirEntryType entryType,
int featureFlags)
{
this->owner = owner;
this->parentEntryID = parentEntryID;
this->entryID = entryID;
this->fileName = fileName;
this->entryType = entryType;
this->featureFlags = featureFlags | (owner.isGroup ? ENTRYINFO_FEATURE_BUDDYMIRRORED : 0);
}
/**
* unitialize the object
*/
void EntryInfo_uninit(EntryInfo* this)
{
kfree(this->parentEntryID);
kfree(this->entryID);
kfree(this->fileName);
}
/**
* Duplicate inEntryInfo to outEntryInfo, also allocate memory for strings
*/
void EntryInfo_dup(const EntryInfo *inEntryInfo, EntryInfo *outEntryInfo)
{
*outEntryInfo = *inEntryInfo;
outEntryInfo->parentEntryID = StringTk_strDup(outEntryInfo->parentEntryID);
outEntryInfo->entryID = StringTk_strDup(outEntryInfo->entryID);
outEntryInfo->fileName = StringTk_strDup(outEntryInfo->fileName);
}
/**
* Update our EntryInfo (this) with a new EntryInfo
*
* Note: newEntryInfo must not be used any more by the caller
* Note: This does not update entryID and entryType as these values cannot change. If we would
* update entryID we would also increase locking overhead.
*/
void EntryInfo_update(EntryInfo* this, const EntryInfo* newEntryInfo)
{
kfree(this->parentEntryID);
kfree(this->fileName);
this->parentEntryID = newEntryInfo->parentEntryID;
this->fileName = newEntryInfo->fileName;
this->featureFlags = newEntryInfo->featureFlags;
if (!DirEntryType_ISDIR(this->entryType) )
{ // only update the owner if it is not a directory
this->owner = newEntryInfo->owner;
}
kfree(newEntryInfo->entryID);
}
void EntryInfo_updateParentEntryID(EntryInfo *this, const char *newParentID)
{
kfree(this->parentEntryID);
this->parentEntryID = StringTk_strDup(newParentID);
}
/**
* Note: This sets to newParentID, the caller must not use newParentID
*/
void EntryInfo_updateSetParentEntryID(EntryInfo *this, const char *newParentID)
{
kfree(this->parentEntryID);
this->parentEntryID = newParentID;
}
char const* EntryInfo_getParentEntryID(const EntryInfo* this)
{
return this->parentEntryID;
}
char const* EntryInfo_getEntryID(const EntryInfo* this)
{
return this->entryID;
}
void EntryInfo_updateFileName(EntryInfo *this, const char* newFileName)
{
kfree(this->fileName);
this->fileName = StringTk_strDup(newFileName);
}
bool EntryInfo_getIsBuddyMirrored(const EntryInfo *this)
{
return (this->featureFlags & ENTRYINFO_FEATURE_BUDDYMIRRORED);
}
/* these two are mainly for logging. */
static inline uint32_t EntryInfo_getOwner(const EntryInfo* this)
{
/* owner.node and owner.group are required to be the same type and size */
return this->owner.group;
}
static inline const char* EntryInfo_getOwnerFlag(const EntryInfo* this)
{
if (this->featureFlags & ENTRYINFO_FEATURE_BUDDYMIRRORED)
return "g";
else
return "";
}
#endif /* ENTRYINFO_H_ */

View File

@@ -0,0 +1,59 @@
#include "FileEvent.h"
#include <linux/fs.h>
void FileEvent_init(struct FileEvent* event, enum FileEventType eventType, struct dentry* dentry)
{
memset(event, 0, sizeof(*event));
event->eventType = eventType;
if (!dentry)
return;
event->pathPagePFN = (unsigned long) kmalloc(4096, GFP_NOFS);
if (!event->pathPagePFN)
return;
event->path = dentry_path_raw(dentry, (char*) event->pathPagePFN, PAGE_SIZE);
if (IS_ERR(event->path))
event->path = NULL;
}
void FileEvent_uninit(struct FileEvent* event)
{
if (event->pathPagePFN)
kfree((void *)event->pathPagePFN);
FileEvent_setTargetStr(event, NULL);
}
void FileEvent_setTargetDentry(struct FileEvent* event, struct dentry* dentry)
{
FileEvent_setTargetStr(event, NULL);
if (!dentry)
return;
event->targetPagePFN = (unsigned long) kmalloc(4096, GFP_NOFS);
if (!event->targetPagePFN)
return;
event->target = dentry_path_raw(dentry, (char*) event->targetPagePFN, PAGE_SIZE);
if (IS_ERR(event->target))
event->target = NULL;
}
void FileEvent_serialize(SerializeCtx* ctx, const struct FileEvent* event)
{
Serialization_serializeUInt(ctx, event->eventType);
if (event->path)
Serialization_serializeStr(ctx, strlen(event->path), event->path);
else
Serialization_serializeStr(ctx, 0, "");
Serialization_serializeBool(ctx, event->target != NULL);
if (event->target != NULL)
Serialization_serializeStr(ctx, strlen(event->target), event->target);
}

View File

@@ -0,0 +1,61 @@
#ifndef FILEEVENT_H_
#define FILEEVENT_H_
#include <common/toolkit/Serialization.h>
struct dentry;
enum FileEventType
{
FileEventType_FLUSH = 1,
FileEventType_TRUNCATE,
FileEventType_SETATTR,
FileEventType_CLOSE_WRITE,
FileEventType_CREATE,
FileEventType_MKDIR,
FileEventType_MKNOD,
FileEventType_SYMLINK,
FileEventType_RMDIR,
FileEventType_UNLINK,
FileEventType_HARDLINK,
FileEventType_RENAME,
FileEventType_OPEN_READ,
FileEventType_OPEN_WRITE,
FileEventType_OPEN_READ_WRITE,
FileEventType_LAST_WRITER_CLOSED,
FileEventType_OPEN_BLOCKED,
};
struct FileEvent
{
uint32_t eventType; /* enum FileEventType */
const char* path; /* NULL if invalid/could not be determined (empty is also allowed) */
const char* target; /* link target for link, new name for rename */
unsigned long pathPagePFN;
unsigned long targetPagePFN;
};
void FileEvent_init(struct FileEvent* event, enum FileEventType eventType, struct dentry* dentry);
void FileEvent_uninit(struct FileEvent* event);
static inline void FileEvent_setTargetStr(struct FileEvent* event, const char* target)
{
if (event->targetPagePFN)
kfree((void *)event->targetPagePFN);
else
kfree(event->target);
event->targetPagePFN = 0;
event->target = kstrdup(target, GFP_NOFS);
}
void FileEvent_setTargetDentry(struct FileEvent* event, struct dentry* dentry);
void FileEvent_serialize(SerializeCtx* ctx, const struct FileEvent* event);
/* the empty file event object is a valid event that may be destroyed, but that holds no
* state itself. thus, it may also be reinitialized without being destroyed first. */
#define FILEEVENT_EMPTY {0, NULL, NULL, 0, 0}
#endif

View File

@@ -0,0 +1,9 @@
#ifndef METADATA_H_
#define METADATA_H_
#include <common/Common.h>
#define META_ROOTDIR_ID_STR "root"
#endif /*METADATA_H_*/

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c)2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifdef BEEGFS_NVFS
#include <app/log/Logger.h>
#include "Nvfs.h"
struct nvfs_dma_rw_ops *nvfs_ops = NULL;
atomic_t nvfs_shutdown = ATOMIC_INIT(1);
DEFINE_PER_CPU(long, nvfs_n_ops);
int REGISTER_FUNC (struct nvfs_dma_rw_ops *ops)
{
printk_fhgfs_debug(KERN_INFO, "%s:%d: register nvfs ops\n",
__func__, __LINE__);
if (NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops) &&
NVIDIA_FS_CHECK_FT_GPU_PAGE(ops) &&
NVIDIA_FS_CHECK_FT_DEVICE_PRIORITY(ops))
{
nvfs_ops = ops;
atomic_set(&nvfs_shutdown, 0);
return 0;
}
return -ENOTSUPP;
}
EXPORT_SYMBOL(REGISTER_FUNC);
void UNREGISTER_FUNC (void)
{
int ops;
printk_fhgfs_debug(KERN_INFO, "%s:%d: begin unregister nvfs\n",
__func__, __LINE__);
(void) atomic_cmpxchg(&nvfs_shutdown, 0, 1);
do
{
msleep(NVFS_HOLD_TIME_MS);
ops = nvfs_count_ops();
printk_fhgfs_debug(KERN_INFO, "%s:%d: nvfs_count_ops=%d\n",
__func__, __LINE__, ops);
}
while (ops);
nvfs_ops = NULL;
printk_fhgfs_debug(KERN_INFO, "%s:%d: unregister nvfs complete\n",
__func__, __LINE__);
}
EXPORT_SYMBOL(UNREGISTER_FUNC);
#endif /* BEEGFS_NVFS */

View File

@@ -0,0 +1,81 @@
#ifndef BEEGFS_NVFS_H
#define BEEGFS_NVFS_H
/**
* BeeGFS and NVIDIA have name collisions on MIN and MAX. Briefly undefine MIN and MAX
* to allow NVIDIA headers to be included without warnings.
*/
#ifdef MIN
#pragma push_macro("MIN")
#undef MIN
#define BEEGFS_POP_MIN
#endif
#ifdef MAX
#pragma push_macro("MAX")
#undef MAX
#define BEEGFS_POP_MAX
#endif
#ifdef BEEGFS_NVFS
#define MODULE_PREFIX beegfs_v1
#include <linux/delay.h>
#include <nvfs-dma.h>
#define REGSTR2(x) x##_register_nvfs_dma_ops
#define REGSTR(x) REGSTR2(x)
#define UNREGSTR2(x) x##_unregister_nvfs_dma_ops
#define UNREGSTR(x) UNREGSTR2(x)
#define REGISTER_FUNC REGSTR(MODULE_PREFIX)
#define UNREGISTER_FUNC UNREGSTR(MODULE_PREFIX)
#define NVFS_IO_ERR -1
#define NVFS_CPU_REQ -2
#define NVFS_HOLD_TIME_MS 1000
extern struct nvfs_dma_rw_ops *nvfs_ops;
extern atomic_t nvfs_shutdown;
DECLARE_PER_CPU(long, nvfs_n_ops);
static inline long nvfs_count_ops(void)
{
int i;
long sum = 0;
for_each_possible_cpu(i)
sum += per_cpu(nvfs_n_ops, i);
return sum;
}
static inline bool nvfs_get_ops(void)
{
if (nvfs_ops && !atomic_read(&nvfs_shutdown)) {
this_cpu_inc(nvfs_n_ops);
return true;
}
return false;
}
static inline void nvfs_put_ops(void)
{
this_cpu_dec(nvfs_n_ops);
}
/**
* Restore BeeGFS definitions of MIN and MAX.
*/
#ifdef BEEGFS_POP_MIN
#pragma pop_macro("MIN")
#undef BEEGFS_POP_MIN
#endif
#ifdef BEEGFS_POP_MAX
#pragma pop_macro("MAX")
#undef BEEGFS_POP_MAX
#endif
#endif /* BEEGFS_NVFS */
#endif /* BEEGFS_NVFS_H */

View File

@@ -0,0 +1,142 @@
#ifndef PATH_H_
#define PATH_H_
#include <common/Common.h>
#include <common/toolkit/StringTk.h>
#include <common/toolkit/list/StrCpyList.h>
#include <common/toolkit/list/StrCpyListIter.h>
struct Path;
typedef struct Path Path;
static inline void Path_init(Path* this);
static inline void Path_initFromString(Path* this, const char* pathStr);
static inline void Path_uninit(Path* this);
static inline void Path_parseStr(Path* this, const char* pathStr);
static inline bool Path_isPathStrAbsolute(const char* pathStr);
// getters & setters
static inline StrCpyList* Path_getPathElems(Path* this);
static inline char* Path_getPathAsStrCopy(Path* this);
static inline bool Path_isAbsolute(Path* this);
static inline void Path_setAbsolute(Path* this, bool absolute);
struct Path
{
StrCpyList pathElems;
bool isPathAbsolute;
};
void Path_init(Path* this)
{
StrCpyList_init(&this->pathElems);
this->isPathAbsolute = false;
}
void Path_initFromString(Path* this, const char* pathStr)
{
Path_init(this);
this->isPathAbsolute = Path_isPathStrAbsolute(pathStr);
Path_parseStr(this, pathStr);
}
void Path_uninit(Path* this)
{
StrCpyList_uninit(&this->pathElems);
}
void Path_parseStr(Path* this, const char* pathStr)
{
StringTk_explode(pathStr, '/', &this->pathElems);
}
bool Path_isPathStrAbsolute(const char* pathStr)
{
return (strlen(pathStr) && (pathStr[0] == '/') );
}
StrCpyList* Path_getPathElems(Path* this)
{
return &this->pathElems;
}
/**
* @return string does not end with a slash; string is kalloced and needs to be kfreed by the caller
*/
char* Path_getPathAsStrCopy(Path* this)
{
char* pathStr;
StrCpyListIter iter;
size_t currentPathPos;
size_t totalPathLen;
// count total path length
totalPathLen = Path_isAbsolute(this) ? 1 : 0;
if(!StrCpyList_length(&this->pathElems) )
{ // (very unlikely)
totalPathLen = 1; // for terminating zero
}
StrCpyListIter_init(&iter, &this->pathElems);
for( ; !StrCpyListIter_end(&iter); StrCpyListIter_next(&iter) )
{
char* currentPathElem = StrCpyListIter_value(&iter);
totalPathLen += strlen(currentPathElem) + 1; // +1 for slash or terminating zero
}
// alloc path buffer
pathStr = os_kmalloc(totalPathLen);
// copy elems to path
if(Path_isAbsolute(this) )
{
pathStr[0] = '/';
currentPathPos = 1;
}
else
currentPathPos = 0;
StrCpyListIter_init(&iter, &this->pathElems);
for( ; !StrCpyListIter_end(&iter); StrCpyListIter_next(&iter) )
{
char* currentPathElem = StrCpyListIter_value(&iter);
size_t currentPathElemLen = strlen(currentPathElem);
memcpy(&pathStr[currentPathPos], currentPathElem, currentPathElemLen);
currentPathPos += currentPathElemLen;
pathStr[currentPathPos] = '/';
currentPathPos++;
}
// zero-terminate the pathStr
pathStr[totalPathLen-1] = 0;
return pathStr;
}
bool Path_isAbsolute(Path* this)
{
return this->isPathAbsolute;
}
void Path_setAbsolute(Path* this, bool absolute)
{
this->isPathAbsolute = absolute;
}
#endif /*PATH_H_*/

View File

@@ -0,0 +1,58 @@
#include <common/toolkit/Serialization.h>
#include <common/storage/PathInfo.h>
#include <os/OsDeps.h>
/**
* Serialize into outBuf, 4-byte aligned
*/
void PathInfo_serialize(SerializeCtx* ctx, const PathInfo* this)
{
// flags
Serialization_serializeUInt(ctx, this->flags);
if (this->flags & PATHINFO_FEATURE_ORIG)
{
// origParentUID
Serialization_serializeUInt(ctx, this->origParentUID);
// origParentEntryID
Serialization_serializeStrAlign4(ctx, strlen(this->origParentEntryID),
this->origParentEntryID);
}
}
/**
* deserialize the given buffer
*/
bool PathInfo_deserialize(DeserializeCtx* ctx, PathInfo* outThis)
{
unsigned flags;
unsigned origParentUID;
unsigned origParentEntryIDLen;
const char* origParentEntryID;
// flags
if(!Serialization_deserializeUInt(ctx, &flags) )
return false;
if (flags & PATHINFO_FEATURE_ORIG)
{ // file has origParentUID and origParentEntryID
// origParentUID
if(!Serialization_deserializeUInt(ctx, &origParentUID) )
return false;
// origParentEntryID
if(!Serialization_deserializeStrAlign4(ctx, &origParentEntryIDLen, &origParentEntryID) )
return false;
}
else
{ // either a directory or a file stored in old format
origParentUID = 0;
origParentEntryID = NULL;
}
PathInfo_init(outThis, origParentUID, origParentEntryID, flags);
return true;
}

View File

@@ -0,0 +1,154 @@
/*
* class PathInfo - extra information how to find chunk files (or later on inode files)
*
* NOTE: If you change this file, do not forget to adjust commons PathInfo.h
*/
#ifndef PATHINFO_H_
#define PATHINFO_H_
#include <common/storage/PathInfo.h>
#include <common/storage/StorageDefinitions.h>
#include <common/toolkit/SerializationTypes.h>
#include <common/toolkit/StringTk.h>
#define PATHINFO_FEATURE_ORIG 1 /* inidicate chunks are stored with origParentUID and
* and origParentEntryID */
#define PATHINFO_FEATURE_ORIG_UNKNOWN 2 /* indicates FEATURE_ORIG is unknown and needs to be
* requested from the meta-inode */
#define PATHINFO_FEATURE_IS_STUB 4 /* indicates entry is a stub file.
* This flag not used in the client module but is added
* here for consistency with the C++ PathInfo implementation */
struct PathInfo;
typedef struct PathInfo PathInfo;
static inline void PathInfo_init(PathInfo *this,
unsigned origParentUID, const char* origParentEntryID, unsigned flags);
static inline void PathInfo_uninit(PathInfo* this);
static inline void PathInfo_dup(const PathInfo *inPathInfo, PathInfo *outPathInfo);
static inline void PathInfo_update(PathInfo* this, const PathInfo* newPathInfo);
static inline void PathInfo_setOrigUID(PathInfo* this, unsigned origParentUID);
static inline void PathInfo_setOrigParentEntryID(PathInfo* this, const char* origParentEntryID);
static inline void PathInfo_setFlags(PathInfo *this, unsigned flags);
extern void PathInfo_serialize(SerializeCtx* ctx, const PathInfo* this);
extern bool PathInfo_deserialize(DeserializeCtx* ctx, PathInfo* outThis);
/**
* minimal information about an entry (file/directory/...)
* note: In order to properly initialize this struct, PathInfo_init() has to be called. This
* is also the only function, which ever should write to the write-unprotected '_' functions.
* Other code is supposed to use the function without the underscore.
*/
struct PathInfo
{
union
{
const unsigned flags; // additional flags (e.g. PATHINFO_FEATURE_INLINED)
unsigned _flags;
};
union
{
const unsigned origParentUID; // UID who created the file, only set for FileInodes
unsigned _origParentUID;
};
union
{
// ID of the dir in which the file was created in. Only set for FileInodes
char const* const origParentEntryID;
char* _origParentEntryID;
};
};
/**
* Main initialization function for PathInfo, should typically be used
*
* @param origParentEntryID will be free'd on uninit
*/
void PathInfo_init(PathInfo* this, unsigned origParentUID, const char* origParentEntryID, unsigned flags)
{
PathInfo_setOrigUID(this, origParentUID);
PathInfo_setOrigParentEntryID(this, origParentEntryID);
PathInfo_setFlags(this, flags);
}
/**
* unitialize the object
*/
void PathInfo_uninit(PathInfo* this)
{
if (this->flags & PATHINFO_FEATURE_ORIG)
kfree(this->origParentEntryID);
}
/**
* Duplicate inPathInfo to outPathInfo, also allocate memory for strings
*/
void PathInfo_dup(const PathInfo* inPathInfo, PathInfo* outPathInfo)
{
int outFlags = inPathInfo->flags;
unsigned outOrigUID = inPathInfo->origParentUID;
const char* outOrigParentEntryID;
if (outFlags & PATHINFO_FEATURE_ORIG)
outOrigParentEntryID = StringTk_strDup(inPathInfo->origParentEntryID);
else
outOrigParentEntryID = NULL;
PathInfo_init(outPathInfo, outOrigUID, outOrigParentEntryID, outFlags);
}
/**
* Update an existing PathInfo
*/
void PathInfo_update(PathInfo* this, const PathInfo* newPathInfo)
{
bool needUpdate = false;
if (this->flags != newPathInfo->flags)
needUpdate = true;
else
if (this->origParentUID != newPathInfo->origParentUID)
needUpdate = true;
else
if ( (this->origParentEntryID && newPathInfo->origParentEntryID) &&
(strcmp(this->origParentEntryID, newPathInfo->origParentEntryID) != 0 ) )
{
// note: no need for other tests in the if-condition, as flags would be different then.
needUpdate = true;
}
if (needUpdate)
{
PathInfo_uninit(this);
PathInfo_dup(newPathInfo, this);
}
}
void PathInfo_setFlags(PathInfo* this, unsigned flags)
{
this->_flags = flags;
}
void PathInfo_setOrigUID(PathInfo* this, unsigned origParentUID)
{
this->_origParentUID = origParentUID;
}
void PathInfo_setOrigParentEntryID(PathInfo* this, const char* origParentEntryID)
{
this->_origParentEntryID = (char *) origParentEntryID;
}
#endif /* PATHINFO_H_ */

View File

@@ -0,0 +1,399 @@
#ifdef BEEGFS_NVFS
#include "linux/uio.h"
#include "linux/pagemap.h"
#include "linux/kernel.h"
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <rdma/ib_cm.h>
#include <common/net/sock/RDMASocket.h>
#include "Nvfs.h"
#include "RdmaInfo.h"
//
// These macros convert a scatterlist entry into a base address (ba) and limit address (la)
// and vice-versa. From this information, we can combine scatterlist entries which are DMA
// contiguous.
//
#define sg_to_ba_la(SG, BA, LA) \
do \
{ \
BA = sg_dma_address(SG); \
LA = BA + sg_dma_len(SG); \
} while (0)
#define ba_la_to_sg(SG, BA, LA) \
do \
{ \
sg_dma_address(SG) = BA; \
sg_dma_len(SG) = LA - BA; \
SG->length = LA - BA; \
} while (0)
bool RdmaInfo_acquireNVFS(void)
{
#ifdef BEEGFS_DEBUG
printk_fhgfs(KERN_INFO, "%s\n", __func__);
#endif // BEEGFS_DEBUG
return nvfs_get_ops();
}
void RdmaInfo_releaseNVFS(void)
{
#ifdef BEEGFS_DEBUG
printk_fhgfs(KERN_INFO, "%s\n", __func__);
#endif // BEEGFS_DEBUG
nvfs_put_ops();
}
int RdmaInfo_detectNVFSRequest(DevicePriorityContext* dpctx,
const struct iov_iter *iter)
{
struct page *page = NULL;
struct iov_iter iter_copy = *iter;
size_t page_offset = 0;
int status = 0;
bool is_gpu = false;
// Test the first page of the request to determine the memory type.
status = iov_iter_get_pages(&iter_copy, &page, PAGE_SIZE, 1, &page_offset);
if (unlikely(status <= 0))
{
// 0 means the iter is empty, so just indicate that it's not an NVFS call.
// Otherwise, indicate an error condition.if (unlikely(status <= 0))
if (status < 0)
printk_fhgfs(KERN_WARNING, "%s: can't retrieve page from iov_iter, status=%d\n",
__func__, status);
return status == 0? false : status;
}
// At this point, the request did come in through nvidia_fs.
// nvfs_is_gpu_page() will return false if RDMA write support
// is disabled in user space.
// TODO: if a GPU page, keep the retrieved page for a future
// RDMA map operation instead of calling put_page()
if (nvfs_ops->nvfs_is_gpu_page(page))
{
is_gpu = true;
dpctx->gpuIndex = nvfs_ops->nvfs_gpu_index(page);
}
put_page(page);
#ifdef BEEGFS_DEBUG
printk_fhgfs(KERN_INFO, "%s:%d: page=%p is_gpu=%d gpu_index=%d\n",
__func__, __LINE__,
page, is_gpu, dpctx? dpctx->gpuIndex : -2);
#endif
return is_gpu;
}
/*
* RdmaInfo_putPpages - Put the pages back into free list.
* @sglist: The array of scatter/gather entries
* @count: The count of entries to process
*/
static inline void RdmaInfo_putPages(struct scatterlist *sglist, int count)
{
int i = 0;
struct scatterlist *sgp = NULL;
if ((sglist != NULL) && (count > 0))
{
for (i = 0, sgp = sglist; i < count; i++, sgp++)
{
put_page(sg_page(sgp));
}
}
}
/*
* RdmaInfo_iovToSglist - Map an iov_iter to scatter/gather list
* @iter: iov_iter
* @sglist: The array of scatter/gather entries (needs to be big enough for all pages)
* @returns number of sg entries set up for the iov_iter
*/
static int RdmaInfo_iovToSglist(const struct iov_iter *iter,
struct scatterlist *sglist)
{
struct page **pages = NULL;
struct scatterlist *sg = NULL;
struct scatterlist *sg_prev = NULL;
struct iov_iter iter_copy = *iter;
int sg_count = 0;
size_t page_length = 0;
size_t page_offset = 0;
size_t bytes = 0;
ssize_t result = 0;
unsigned i = 0;
unsigned npages = 0;
while (iov_iter_count(&iter_copy) > 0)
{
result = iov_iter_get_pages_alloc(&iter_copy, &pages, iov_iter_count(&iter_copy), &page_offset);
if (result < 0)
{
printk_fhgfs(KERN_ERR, "RdmaInfo_iovToSglist: no memory pages\n");
RdmaInfo_putPages(sglist, sg_count);
return -ENOMEM;
}
bytes = result;
npages = (bytes + page_offset + PAGE_SIZE - 1) / PAGE_SIZE;
sg_count += npages;
for (i = 0, sg = sglist; i < npages; i++, sg = sg_next(sg))
{
page_length = min(bytes, PAGE_SIZE - page_offset);
sg_set_page(sg, pages[i], page_length, page_offset);
bytes -= page_length;
page_offset = 0;
sg_prev = sg;
}
kvfree(pages);
iov_iter_advance(&iter_copy, result);
}
if (sg_prev)
{
sg_mark_end(sg_prev);
}
return sg_count;
}
/*
* RdmaInfo_coalesceSglist - Coalesce scatterlist entries for optimal RDMA operations.
* @sglist: input list (not necessarily coalesced)
* @dmalist: output list (coalesced)
* @count: Number of scatterlist entries
* @returns count of coalesed list
*/
static int RdmaInfo_coalesceSglist(struct scatterlist *sglist,
struct scatterlist *dmalist, int count)
{
struct scatterlist *sgp = sglist;
struct scatterlist *dmap = dmalist;
dma_addr_t dma_ba = 0, dma_la = 0;
dma_addr_t sg_ba = 0, sg_la = 0;
int i = 0;
#ifdef BEEGFS_DEBUG
size_t len = sg_dma_len(sgp);
#endif
//
// Load the first range.
//
sg_to_ba_la(sgp, dma_ba, dma_la);
if (count > 1)
{
for_each_sg(&sglist[1], sgp, count-1, i)
{
#ifdef BEEGFS_DEBUG
len += sg_dma_len(sgp);
#endif
sg_to_ba_la(sgp, sg_ba, sg_la);
//
// If the regions aren't contiguous, then set the current
// range and start a new range. Otherwise, add on to the
// current range.
//
if (dma_la != sg_ba)
{
ba_la_to_sg(dmap, dma_ba, dma_la);
sg_unmark_end(dmap);
dmap = sg_next(dmap);
dma_ba = sg_ba;
dma_la = sg_la;
}
else
{
dma_la = sg_la;
}
}
}
//
// Set the last range.
//
ba_la_to_sg(dmap, dma_ba, dma_la);
sg_mark_end(dmap);
#ifdef BEEGFS_DEBUG
printk_fhgfs(KERN_INFO, "%s len=%zu count=%d return=%d\n", __func__, len, count, (int)(1 + dmap - dmalist));
#endif
return 1 + dmap - dmalist;
}
/*
* RdmaInfo_map - Map GPU buffers for RDMA operations.
* @iter: iov_iter
* @socket: RDMA capable socket struct.
* @dma_dir: read (DMA_FROM_DEVICE) or write (DMA_TO_DEVICE)
* @returns RdmaInfo struct
*/
static RdmaInfo * RdmaInfo_map(const struct iov_iter *iter, Socket *socket,
enum dma_data_direction dma_dir)
{
RdmaInfo *rdmap;
RDMASocket *rs;
struct ib_device *device;
struct scatterlist *sglist;
struct scatterlist *dmalist;
int status = 0;
int sg_count;
int dma_count;
int count;
unsigned npages;
unsigned key;
if (Socket_getSockType(socket) != NICADDRTYPE_RDMA)
return ERR_PTR(-EINVAL);
rs = (RDMASocket*) socket;
if (!RDMASocket_isRkeyGlobal(rs))
{
printk_fhgfs(KERN_ERR, "ERROR: rkey type is not compatible with GDS\n");
return ERR_PTR(-EINVAL);
}
npages = 1 + iov_iter_npages(iter, INT_MAX);
//
// Allocate the scatterlist.
//
rdmap = kzalloc(sizeof(RdmaInfo), GFP_ATOMIC);
sglist = kzalloc(npages * sizeof(struct scatterlist), GFP_ATOMIC);
dmalist = kzalloc(npages * sizeof(struct scatterlist), GFP_ATOMIC);
if (unlikely(!rdmap || !sglist || !dmalist))
{
printk_fhgfs(KERN_ERR, "%s: no memory for scatterlist\n", __func__);
status = -ENOMEM;
goto error_return;
}
//
// Populate the scatterlist from the iov_iter.
//
sg_count = RdmaInfo_iovToSglist(iter, sglist);
if (unlikely(sg_count < 0))
{
printk_fhgfs(KERN_ERR, "%s: can't convert iov_iter to scatterlist\n", __func__);
status = -EIO;
goto error_return;
}
//
// DMA map all of the pages.
//
device = RDMASocket_getDevice(rs);
key = RDMASocket_getRkey(rs);
count = nvfs_ops->nvfs_dma_map_sg_attrs(device->dma_device, sglist, sg_count,
dma_dir, DMA_ATTR_NO_WARN);
if (unlikely(count != sg_count))
{
if (count == NVFS_CPU_REQ)
{
printk_fhgfs(KERN_ERR, "%s: NVFS_CPU_REQ\n", __func__);
status = 0;
}
else if (count == NVFS_IO_ERR)
{
printk_fhgfs(KERN_ERR, "%s: can't DMA map mixed CPU/GPU pages\n", __func__);
status = -EINVAL;
}
else
{
printk_fhgfs(KERN_ERR, "%s: unknown error returned from NVFS (%d)\n", __func__, count);
status = -EIO;
}
goto error_return;
}
//
// Coalesce the scatterlist.
//
dma_count = RdmaInfo_coalesceSglist(sglist, dmalist, count);
if (unlikely(dma_count > RDMA_MAX_DMA_COUNT))
{
printk_fhgfs(KERN_ERR, "%s: too many DMA elements count=%d max=%d\n", __func__,
dma_count, RDMA_MAX_DMA_COUNT);
status = -EIO;
goto error_return;
}
//
// Fill in the rdma info.
//
rdmap->dma_count = dma_count;
rdmap->sg_count = sg_count;
rdmap->tag = 0x00000000;
rdmap->device = device;
rdmap->key = key;
rdmap->sglist = sglist;
rdmap->dmalist = dmalist;
#ifdef BEEGFS_DEBUG_RDMA
RdmaInfo_dumpIovIter(iter);
RdmaInfo_dumpSgtable("MAP", rdmap->dmalist, rdmap->dma_count);
RdmaInfo_dumpRdmaInfo(rdmap);
#endif /* BEEGFS_DEBUG_RDMA */
return rdmap;
error_return:
if (sglist)
{
RdmaInfo_putPages(sglist, sg_count);
kfree(sglist);
}
if (dmalist)
kfree(dmalist);
if (rdmap)
kfree(rdmap);
return (status == 0) ? NULL : ERR_PTR(status);
}
RdmaInfo* RdmaInfo_mapRead(const struct iov_iter *iter, Socket *socket)
{
return RdmaInfo_map(iter, socket, DMA_FROM_DEVICE);
}
RdmaInfo* RdmaInfo_mapWrite(const struct iov_iter *iter, Socket *socket)
{
return RdmaInfo_map(iter, socket, DMA_TO_DEVICE);
}
/*
* RdmaInfo_unmap - Unmap GPU buffers for RDMA operations.
* @rdmap: RdmaInfo created by RdmaInfo_map (see above)
* @dma_dir: read (DMA_FROM_DEVICE) or write (DMA_TO_DEVICE)
*/
static inline void RdmaInfo_unmap(RdmaInfo *rdmap, enum dma_data_direction dma_dir)
{
if (rdmap->sglist)
{
if (rdmap->dmalist)
{
nvfs_ops->nvfs_dma_unmap_sg(rdmap->device->dma_device, rdmap->sglist, rdmap->sg_count, dma_dir);
RdmaInfo_putPages(rdmap->sglist, rdmap->sg_count);
kfree(rdmap->dmalist);
}
kfree(rdmap->sglist);
}
kfree(rdmap);
}
void RdmaInfo_unmapRead(RdmaInfo *rdmap)
{
RdmaInfo_unmap(rdmap, DMA_FROM_DEVICE);
}
void RdmaInfo_unmapWrite(RdmaInfo *rdmap)
{
RdmaInfo_unmap(rdmap, DMA_TO_DEVICE);
}
#endif /* BEEGFS_NVFS */

View File

@@ -0,0 +1,117 @@
#ifndef _RDMAINFO_H_
#define _RDMAINFO_H_
#ifdef BEEGFS_NVFS
#include "linux/uio.h"
#include <common/net/sock/RDMASocket.h>
#include <common/toolkit/Serialization.h>
#include <common/storage/Nvfs.h>
#define RDMA_MAX_DMA_COUNT 64
struct RdmaInfo
{
size_t sg_count;
size_t dma_count;
uint32_t tag;
uint64_t key;
struct ib_device *device;
void *pages;
struct scatterlist *sglist;
struct scatterlist *dmalist;
};
typedef struct RdmaInfo RdmaInfo;
static inline void RdmaInfo_serialize(SerializeCtx* ctx, RdmaInfo *rdmap)
{
int i = 0;
struct scatterlist *sgp;
if (rdmap != NULL)
{
Serialization_serializeUInt64(ctx, rdmap->dma_count);
if (rdmap->dma_count > 0)
{
Serialization_serializeUInt(ctx, rdmap->tag);
Serialization_serializeUInt64(ctx, rdmap->key);
for (i = 0, sgp = rdmap->dmalist; i < rdmap->dma_count; i++, sgp++)
{
Serialization_serializeUInt64(ctx, sg_dma_address(sgp));
Serialization_serializeUInt64(ctx, (uint64_t)sgp->length);
Serialization_serializeUInt64(ctx, (uint64_t)sgp->offset);
}
}
}
else
{
Serialization_serializeUInt64(ctx, 0ull);
}
}
bool RdmaInfo_acquireNVFS(void);
void RdmaInfo_releaseNVFS(void);
int RdmaInfo_detectNVFSRequest(DevicePriorityContext* dpctx,
const struct iov_iter *iter);
static inline int RdmaInfo_nvfsDevicePriority(struct ib_device* dev,
int gpuIndex);
RdmaInfo* RdmaInfo_mapRead(const struct iov_iter *iter, Socket *socket);
RdmaInfo* RdmaInfo_mapWrite(const struct iov_iter *iter, Socket *socket);
void RdmaInfo_unmapRead(RdmaInfo *rdmap);
void RdmaInfo_unmapWrite(RdmaInfo *rdmap);
#ifdef BEEGFS_DEBUG_RDMA
static inline void RdmaInfo_dumpIovIter(const struct iov_iter *iter)
{
int i = 0;
struct iov_iter iter_copy = *iter;
printk(KERN_ALERT "IOV_ITER : count=%ld", iov_iter_count(&iter_copy));
while (iov_iter_count(&iter_copy) > 0)
{
printk(KERN_INFO " %3d: %px %ld", i, iter_iov_addr(&iter_copy), iter_iov_len(&iter_copy));
iov_iter_advance(&iter_copy, iter_iov_len(&iter_copy));
}
}
static inline void RdmaInfo_dumpSgtable(const char *header,
struct scatterlist *sglist, size_t sg_count)
{
int i = 0;
struct scatterlist *sgp = NULL;
printk_fhgfs(KERN_INFO, "%-12s : (%ld entries)", header, sg_count);
for (i = 0, sgp = sglist; i < sg_count; i++, sgp++)
{
printk_fhgfs(KERN_INFO, " [%3d] = %016llx %d %d %d",
i,
sg_dma_address(sgp),
sg_dma_len(sgp),
sgp->length,
sgp->offset);
}
}
static inline void RdmaInfo_dumpRdmaInfo(RdmaInfo *rdmap)
{
if (rdmap != NULL)
{
printk_fhgfs(KERN_INFO, "RDMA :\n"
" NENTS = %ld\n"
" TAG = %x\n"
" KEY = %llx\n",
rdmap->dma_count, rdmap->tag, rdmap->key);
RdmaInfo_dumpSgtable("SGLIST", rdmap->sglist, rdmap->sg_count);
RdmaInfo_dumpSgtable("DMALIST", rdmap->dmalist, rdmap->dma_count);
}
}
#endif /* BEEGFS_DEBUG_RDMA */
int RdmaInfo_nvfsDevicePriority(struct ib_device* dev, int gpuIndex)
{
return nvfs_ops->nvfs_device_priority(dev->dma_device, gpuIndex);
}
#endif /* BEEGFS_NVFS */
#endif /* _RDMAINFO_H_ */

View File

@@ -0,0 +1,62 @@
/*
* Information provided by stat()
*/
#include <common/toolkit/Serialization.h>
#include "StatData.h"
bool StatData_deserialize(DeserializeCtx* ctx, StatData* outThis)
{
// flags
if(!Serialization_deserializeUInt(ctx, &outThis->flags) )
return false;
// mode
if(!Serialization_deserializeInt(ctx, &outThis->settableFileAttribs.mode) )
return false;
// sumChunkBlocks
if(!Serialization_deserializeUInt64(ctx, &outThis->numBlocks) )
return false;
// creationTime
if(!Serialization_deserializeInt64(ctx, &outThis->creationTimeSecs) )
return false;
// aTime
if(!Serialization_deserializeInt64(ctx, &outThis->settableFileAttribs.lastAccessTimeSecs) )
return false;
// mtime
if(!Serialization_deserializeInt64(ctx, &outThis->settableFileAttribs.modificationTimeSecs) )
return false;
// ctime
if(!Serialization_deserializeInt64(ctx, &outThis->attribChangeTimeSecs) )
return false;
// fileSize
if(!Serialization_deserializeInt64(ctx, &outThis->fileSize) )
return false;
// nlink
if(!Serialization_deserializeUInt(ctx, &outThis->nlink) )
return false;
// metaVersion
if(!Serialization_deserializeUInt(ctx, &outThis->metaVersion) )
return false;
// uid
if(!Serialization_deserializeUInt(ctx, &outThis->settableFileAttribs.userID) )
return false;
// gid
if(!Serialization_deserializeUInt(ctx, &outThis->settableFileAttribs.groupID) )
return false;
return true;
}

View File

@@ -0,0 +1,61 @@
/*
* Information provided by stat()
*/
#ifndef STATDATA_H_
#define STATDATA_H_
#include <common/storage/StorageDefinitions.h>
#include <common/FhgfsTypes.h>
#define STATDATA_FEATURE_SPARSE_FILE 1
// stat blocks are 512 bytes, ">> 9" is then the same as "/ 512", but faster
#define STATDATA_SIZETOBLOCKSBIT_SHIFT 9
struct StatData;
typedef struct StatData StatData;
extern bool StatData_deserialize(DeserializeCtx* ctx, StatData* outThis);
static inline void StatData_getOsStat(StatData* this, fhgfs_stat* outOsStat);
struct StatData
{
unsigned flags;
int64_t fileSize;
uint64_t numBlocks;
int64_t creationTimeSecs; // real creation time
int64_t attribChangeTimeSecs; // this corresponds to unix ctime
unsigned nlink;
SettableFileAttribs settableFileAttribs;
unsigned metaVersion; // inc'ed when internal metadata is modified (for cache invalidation)
};
void StatData_getOsStat(StatData* this, fhgfs_stat* outOsStat)
{
outOsStat->mode = this->settableFileAttribs.mode;
outOsStat->size = this->fileSize;
outOsStat->blocks = this->numBlocks;
outOsStat->uid = this->settableFileAttribs.userID;
outOsStat->gid = this->settableFileAttribs.groupID;
outOsStat->nlink = this->nlink;
outOsStat->atime.tv_sec = this->settableFileAttribs.lastAccessTimeSecs;
outOsStat->atime.tv_nsec = 0;
outOsStat->mtime.tv_sec = this->settableFileAttribs.modificationTimeSecs;
outOsStat->mtime.tv_nsec = 0;
outOsStat->ctime.tv_sec = this->attribChangeTimeSecs;
outOsStat->ctime.tv_nsec = 0;
outOsStat->metaVersion = this->metaVersion;
}
#endif /* STATDATA_H_ */

View File

@@ -0,0 +1,98 @@
#ifndef STORAGEDEFINITIONS_H_
#define STORAGEDEFINITIONS_H_
/*
* Remember to keep these definitions in sync with StorageDefinitions.h of fhgfs_common!!!
*/
#include <common/Common.h>
// open rw flags
#define OPENFILE_ACCESS_READ 1
#define OPENFILE_ACCESS_WRITE 2
#define OPENFILE_ACCESS_READWRITE 4
// open extra flags
#define OPENFILE_ACCESS_APPEND 8
#define OPENFILE_ACCESS_TRUNC 16
#define OPENFILE_ACCESS_DIRECT 32 /* for direct IO */
#define OPENFILE_ACCESS_SYNC 64 /* for sync'ed IO */
// open masks
#define OPENFILE_ACCESS_MASK_RW \
(OPENFILE_ACCESS_READ | OPENFILE_ACCESS_WRITE | OPENFILE_ACCESS_READWRITE)
#define OPENFILE_ACCESS_MASK_EXTRA (~OPENFILE_ACCESS_MASK_RW)
// set attribs flags
#define SETATTR_CHANGE_MODE 1
#define SETATTR_CHANGE_USERID 2
#define SETATTR_CHANGE_GROUPID 4
#define SETATTR_CHANGE_MODIFICATIONTIME 8
#define SETATTR_CHANGE_LASTACCESSTIME 16
// entry and append lock type flags
#define ENTRYLOCKTYPE_UNLOCK 1
#define ENTRYLOCKTYPE_EXCLUSIVE 2
#define ENTRYLOCKTYPE_SHARED 4
#define ENTRYLOCKTYPE_NOWAIT 8 /* operation may not block if lock not available */
#define ENTRYLOCKTYPE_CANCEL 16 /* cancel all granted and pending locks for given handle
(normally on client file close) */
// entry lock mask
#define ENTRYLOCKTYPE_LOCKOPS_ADD (ENTRYLOCKTYPE_EXCLUSIVE | ENTRYLOCKTYPE_SHARED)
#define ENTRYLOCKTYPE_LOCKOPS_REMOVE (ENTRYLOCKTYPE_UNLOCK | ENTRYLOCKTYPE_CANCEL)
struct SettableFileAttribs;
typedef struct SettableFileAttribs SettableFileAttribs;
enum DirEntryType
{ // don't use these directly, use the DirEntryType_IS...() macros below to check entry types
DirEntryType_INVALID = 0, DirEntryType_DIRECTORY = 1, DirEntryType_REGULARFILE = 2,
DirEntryType_SYMLINK = 3, DirEntryType_BLOCKDEV = 4, DirEntryType_CHARDEV = 5,
DirEntryType_FIFO = 6, DirEntryType_SOCKET = 7
};
typedef enum DirEntryType DirEntryType;
#define DirEntryType_ISVALID(dirEntryType) (dirEntryType != DirEntryType_INVALID)
#define DirEntryType_ISDIR(dirEntryType) (dirEntryType == DirEntryType_DIRECTORY)
#define DirEntryType_ISREGULARFILE(dirEntryType) (dirEntryType == DirEntryType_REGULARFILE)
#define DirEntryType_ISSYMLINK(dirEntryType) (dirEntryType == DirEntryType_SYMLINK)
#define DirEntryType_ISBLOCKDEV(dirEntryType) (dirEntryType == DirEntryType_BLOCKDEV)
#define DirEntryType_ISCHARDEV(dirEntryType) (dirEntryType == DirEntryType_CHARDEV)
#define DirEntryType_ISFIFO(dirEntryType) (dirEntryType == DirEntryType_FIFO)
#define DirEntryType_ISSOCKET(dirEntryType) (dirEntryType == DirEntryType_SOCKET)
/* @return true for any kind of file, including symlinks and special files */
#define DirEntryType_ISFILE(dirEntryType) ( (dirEntryType >= 2) && (dirEntryType <= 7) )
// hint (but not strict enforcement) that the inode is inlined into the dentry
#define STATFLAG_HINT_INLINE 1
enum EntryLockRequestType
{EntryLockRequestType_ENTRYFLOCK=0, EntryLockRequestType_RANGEFLOCK=1,
EntryLockRequestType_ENTRYCOHERENCE=2, EntryLockRequestType_RANGECOHERENCE=3};
typedef enum EntryLockRequestType EntryLockRequestType;
/**
* Note: Typically used in combination with a value of SETATTR_CHANGE_...-Flags to determine which
* of the fields are actually used
*/
struct SettableFileAttribs
{
int mode;
unsigned userID;
unsigned groupID;
int64_t modificationTimeSecs; // unix mtime
int64_t lastAccessTimeSecs; // unix atime
};
#endif /*STORAGEDEFINITIONS_H_*/

View File

@@ -0,0 +1,93 @@
#include <common/storage/StorageErrors.h>
#include <common/Common.h>
#define __FHGFSOPS_ERRLIST_SIZE \
( (sizeof(__FHGFSOPS_ERRLIST) ) / (sizeof(struct FhgfsOpsErrListEntry) ) - 1)
/* -1 because last elem is NULL */
// Note: This is based on the FhgfsOpsErr entries
// Note: We use EREMOTEIO as a generic error here
struct FhgfsOpsErrListEntry const __FHGFSOPS_ERRLIST[] =
{
{"Success", 0}, // FhgfsOpsErr_SUCCESS
{"Internal error", EREMOTEIO}, // FhgfsOpsErr_INTERNAL
{"Interrupted system call", EINTR}, // FhgfsOpsErr_INTERRUPTED
{"Communication error", ECOMM}, // FhgfsOpsErr_COMMUNICATION
{"Communication timeout", ETIMEDOUT}, // FhgfsOpsErr_COMMTIMEDOUT
{"Unknown node", EREMOTEIO}, // FhgfsOpsErr_UNKNOWNNODE
{"Node is not owner of entry", EREMOTEIO}, // FhgfsOpsErr_NOTOWNER
{"Entry exists already", EEXIST}, // FhgfsOpsErr_EXISTS
{"Path does not exist", ENOENT}, // FhgfsOpsErr_PATHNOTEXISTS
{"Entry is in use", EBUSY}, // FhgfsOpsErr_INUSE
{"Dynamic attributes of entry are outdated", EREMOTEIO}, // FhgfsOpsErr_INUSE
{"Removed", 999}, // former FhgfsOpsErr_PARENTTOSUBDIR, not used
{"Entry is not a directory", ENOTDIR}, // FhgfsOpsErr_NOTADIR
{"Directory is not empty", ENOTEMPTY}, // FhgfsOpsErr_NOTEMPTY
{"No space left", ENOSPC}, // FhgfsOpsErr_NOSPACE
{"Unknown storage target", EREMOTEIO}, // FhgfsOpsErr_UNKNOWNTARGET
{"Operation would block", EWOULDBLOCK}, // FhgfsOpsErr_WOULDBLOCK
{"Inode not inlined", EREMOTEIO}, // FhgfsOpsErr_INODENOTINLINED
{"Underlying file system error", EREMOTEIO}, // FhgfsOpsErr_SAVEERROR
{"Argument too large", EFBIG}, // FhgfsOpsErr_TOOBIG
{"Invalid argument", EINVAL}, // FhgfsOpsErr_INVAL
{"Bad memory address", EFAULT}, // FhgfsOpsErr_ADDRESSFAULT
{"Try again", EAGAIN}, // FhgfsOpsErr_AGAIN
{"Potential cache loss for open file handle. (Server crash detected.)" , EREMOTEIO}, /*
FhgfsOpsErr_STORAGE_SRV_CRASHED*/
{"Permission denied", EPERM}, // FhgfsOpsErr_PERM
{"Quota exceeded", EDQUOT}, // FhgfsOpsErr_DQUOT
{"Out of memory", ENOMEM}, // FhgfsOpsErr_OUTOFMEM
{"Numerical result out of range", ERANGE}, // FhgfsOpsErr_RANGE
{"No data available", ENODATA}, // FhgfsOpsErr_NODATA
{"Operation not supported", EOPNOTSUPP}, // FhgfsOpsErr_NOTSUPP
{"Argument list too long", E2BIG}, // FhgfsOpsErr_TOOLONG
{"Metadata version mismatch", ESTALE}, // FhgfsOpsErr_METAVERSIONMISMATCH
{"Inode is locked", EBUSY}, // FhgfsOpsErr_INODELOCKED
{"File access denied by state restrictions", EWOULDBLOCK}, // FhgfsOpsErr_FILEACCESS_DENIED
{NULL, 0}
};
/**
* @return static human-readable error string
*/
const char* FhgfsOpsErr_toErrString(FhgfsOpsErr errCode)
{
size_t unsignedErrCode = (size_t)errCode;
if(likely(unsignedErrCode < __FHGFSOPS_ERRLIST_SIZE) )
return __FHGFSOPS_ERRLIST[unsignedErrCode].errString;
#ifdef BEEGFS_DEBUG
printk_fhgfs(KERN_WARNING, "Unknown errCode given to FhgfsOpsErr_toErrString(): %d/%u "
"(dumping stack...)\n", (int)errCode, (unsigned)errCode);
dump_stack();
#endif
return "Unknown error code";
}
/**
* @return negative linux error code
*/
int FhgfsOpsErr_toSysErr(FhgfsOpsErr errCode)
{
size_t unsignedErrCode = (size_t)errCode;
if(likely(unsignedErrCode < __FHGFSOPS_ERRLIST_SIZE) )
return -(__FHGFSOPS_ERRLIST[errCode].sysErr);
#ifdef BEEGFS_DEBUG
printk_fhgfs(KERN_WARNING, "Unknown errCode given to FhgfsOpsErr_toSysErr(): %d/%u "
"(dumping stack...)\n", (int)errCode, (unsigned)errCode);
dump_stack();
#endif
return -EPERM;
}

View File

@@ -0,0 +1,79 @@
#ifndef STORAGEERRORS_H_
#define STORAGEERRORS_H_
/*
* Remember to keep these definitions in sync with StorageErrors.h of fhgfs_common!
*/
#include <common/Common.h>
// Note: keep this above __FHGFSOPS_ERRLIST declaration
struct FhgfsOpsErrListEntry
{
const char* errString; // human-readable error string
int sysErr; // positive linux system error code
};
extern struct FhgfsOpsErrListEntry const __FHGFSOPS_ERRLIST[];
/**
* Note: Remember to keep this in sync with FHGFSOPS_ERRLIST
*
* Note: We need the negative dummy (-1) because some return values (like CommKit) cast this enum to
* negative int64_t and this leads bad (positive) values when the enum isn't signed. So the dummy
* forces the compiler to make the enum a signed variable.
*/
enum FhgfsOpsErr
{
FhgfsOpsErr_DUMMY_DONTUSEME = -1, /* see comment above */
FhgfsOpsErr_SUCCESS = 0,
FhgfsOpsErr_INTERNAL = 1,
FhgfsOpsErr_INTERRUPTED = 2,
FhgfsOpsErr_COMMUNICATION = 3,
FhgfsOpsErr_COMMTIMEDOUT = 4,
FhgfsOpsErr_UNKNOWNNODE = 5,
FhgfsOpsErr_NOTOWNER = 6,
FhgfsOpsErr_EXISTS = 7,
FhgfsOpsErr_PATHNOTEXISTS = 8,
FhgfsOpsErr_INUSE = 9,
FhgfsOpsErr_DYNAMICATTRIBSOUTDATED = 10,
FhgfsOpsErr_PARENTTOSUBDIR = 11,
FhgfsOpsErr_NOTADIR = 12,
FhgfsOpsErr_NOTEMPTY = 13,
FhgfsOpsErr_NOSPACE = 14,
FhgfsOpsErr_UNKNOWNTARGET = 15,
FhgfsOpsErr_WOULDBLOCK = 16,
FhgfsOpsErr_INODENOTINLINED = 17, // inode is not inlined into the dentry
FhgfsOpsErr_SAVEERROR = 18, // saving to the underlying file system failed
FhgfsOpsErr_TOOBIG = 19, // corresponds to EFBIG
FhgfsOpsErr_INVAL = 20, // corresponds to EINVAL
FhgfsOpsErr_ADDRESSFAULT = 21, // corresponds to EFAULT
FhgfsOpsErr_AGAIN = 22, // corresponds to EAGAIN
FhgfsOpsErr_STORAGE_SRV_CRASHED = 23, /* Potential cache loss for open file handle.
(Server crash detected.)*/
FhgfsOpsErr_PERM = 24, // corresponds to EPERM
FhgfsOpsErr_DQUOT = 25, // corresponds to EDQUOT (quota exceeded)
FhgfsOpsErr_OUTOFMEM = 26, // corresponds to ENOMEM (mem allocation failed)
FhgfsOpsErr_RANGE = 27, // corresponds to ERANGE (needed for xattrs)
FhgfsOpsErr_NODATA = 28, // corresponds to ENODATA==ENOATTR (xattr not found)
FhgfsOpsErr_NOTSUPP = 29, // corresponds to EOPNOTSUPP
FhgfsOpsErr_TOOLONG = 30, // corresponds to E2BIG (needed for xattrs)
FhgfsOpsErr_METAVERSIONMISMATCH = 31, // metadata versions do not match, needed for cache invalidation
FhgfsOpsErr_INODELOCKED = 32, // inode is locked, needed for GlobalInodeLock store
FhgfsOpsErr_FILEACCESS_DENIED = 33, // file access denied due to current file state restrictions
};
typedef enum FhgfsOpsErr FhgfsOpsErr;
extern const char* FhgfsOpsErr_toErrString(FhgfsOpsErr fhgfsOpsErr);
extern int FhgfsOpsErr_toSysErr(FhgfsOpsErr fhgfsOpsErr);
#endif /*STORAGEERRORS_H_*/

View File

@@ -0,0 +1,16 @@
#include "StoragePoolId.h"
#include <common/toolkit/Serialization.h>
void StoragePoolId_serialize(SerializeCtx* ctx, const StoragePoolId* this)
{
Serialization_serializeUShort(ctx, this->value);
}
bool StoragePoolId_deserialize(DeserializeCtx* ctx, StoragePoolId* outThis)
{
if(!Serialization_deserializeUShort(ctx, &(outThis->value) ) )
return false;
return true;
}

View File

@@ -0,0 +1,37 @@
#ifndef CLIENT_STORAGEPOOLID_H
#define CLIENT_STORAGEPOOLID_H
#include <common/toolkit/SerializationTypes.h>
#include <common/toolkit/StringTk.h>
// keep in sync with values from server's StoragePoolStore
#define STORAGEPOOLID_INVALIDPOOLID 0
// Note: this must always be in sync with server's StoragePoolId!
struct StoragePoolId;
typedef struct StoragePoolId StoragePoolId;
struct StoragePoolId
{
uint16_t value;
};
static inline void StoragePoolId_set(StoragePoolId* this, uint16_t value)
{
this->value = value;
}
static inline bool StoragePoolId_compare(const StoragePoolId* this, const StoragePoolId* other)
{
return (this->value == other->value);
}
static inline char* StoragePoolId_str(const StoragePoolId* this)
{
return StringTk_uintToStr(this->value);
}
extern void StoragePoolId_serialize(SerializeCtx* ctx, const StoragePoolId* this);
extern bool StoragePoolId_deserialize(DeserializeCtx* ctx, StoragePoolId* outThis);
#endif /* CLIENT_STORAGEPOOLID_H */

View File

@@ -0,0 +1,70 @@
#include <common/toolkit/MathTk.h>
#include "BuddyMirrorPattern.h"
bool BuddyMirrorPattern_deserializePattern(StripePattern* this, DeserializeCtx* ctx)
{
BuddyMirrorPattern* thisCast = (BuddyMirrorPattern*)this;
RawList mirrorBuddyGroupIDsVec;
// defaultNumTargets
if(!Serialization_deserializeUInt(ctx, &thisCast->defaultNumTargets) )
return false;
// mirrorBuddyGroupIDs
if(!Serialization_deserializeUInt16VecPreprocess(ctx, &mirrorBuddyGroupIDsVec) )
return false;
if(!Serialization_deserializeUInt16Vec(&mirrorBuddyGroupIDsVec, &thisCast->mirrorBuddyGroupIDs) )
return false;
// check mirrorBuddyGroupIDs
if(!UInt16Vec_length(&thisCast->mirrorBuddyGroupIDs) )
return false;
return true;
}
size_t BuddyMirrorPattern_getStripeTargetIndex(StripePattern* this, int64_t pos)
{
struct BuddyMirrorPattern* p = container_of(this, struct BuddyMirrorPattern, stripePattern);
return (pos / this->chunkSize) % UInt16Vec_length(&p->mirrorBuddyGroupIDs);
}
uint16_t BuddyMirrorPattern_getStripeTargetID(StripePattern* this, int64_t pos)
{
BuddyMirrorPattern* thisCast = (BuddyMirrorPattern*)this;
size_t targetIndex = BuddyMirrorPattern_getStripeTargetIndex(this, pos);
return UInt16Vec_at(&thisCast->mirrorBuddyGroupIDs, targetIndex);
}
void BuddyMirrorPattern_getStripeTargetIDsCopy(StripePattern* this, UInt16Vec* outTargetIDs)
{
BuddyMirrorPattern* thisCast = (BuddyMirrorPattern*)this;
ListTk_copyUInt16ListToVec( (UInt16List*)&thisCast->mirrorBuddyGroupIDs, outTargetIDs);
}
UInt16Vec* BuddyMirrorPattern_getStripeTargetIDs(StripePattern* this)
{
BuddyMirrorPattern* thisCast = (BuddyMirrorPattern*)this;
return &thisCast->mirrorBuddyGroupIDs;
}
unsigned BuddyMirrorPattern_getMinNumTargets(StripePattern* this)
{
return 1;
}
unsigned BuddyMirrorPattern_getDefaultNumTargets(StripePattern* this)
{
BuddyMirrorPattern* thisCast = (BuddyMirrorPattern*)this;
return thisCast->defaultNumTargets;
}

View File

@@ -0,0 +1,124 @@
#ifndef BUDDYMIRRORPATTERN_H_
#define BUDDYMIRRORPATTERN_H_
#include <common/toolkit/Serialization.h>
#include "StripePattern.h"
struct BuddyMirrorPattern;
typedef struct BuddyMirrorPattern BuddyMirrorPattern;
static inline void BuddyMirrorPattern_init(BuddyMirrorPattern* this,
unsigned chunkSize, UInt16Vec* mirrorBuddyGroupIDs, unsigned defaultNumTargets);
static inline void BuddyMirrorPattern_initFromChunkSize(BuddyMirrorPattern* this,
unsigned chunkSize);
static inline BuddyMirrorPattern* BuddyMirrorPattern_construct(
unsigned chunkSize, UInt16Vec* mirrorBuddyGroupIDs, unsigned defaultNumTargets);
static inline BuddyMirrorPattern* BuddyMirrorPattern_constructFromChunkSize(unsigned chunkSize);
static inline void BuddyMirrorPattern_uninit(StripePattern* this);
static inline void __BuddyMirrorPattern_assignVirtualFunctions(BuddyMirrorPattern* this);
// virtual functions
extern bool BuddyMirrorPattern_deserializePattern(StripePattern* this, DeserializeCtx* ctx);
extern size_t BuddyMirrorPattern_getStripeTargetIndex(StripePattern* this, int64_t pos);
extern uint16_t BuddyMirrorPattern_getStripeTargetID(StripePattern* this, int64_t pos);
extern void BuddyMirrorPattern_getStripeTargetIDsCopy(StripePattern* this, UInt16Vec* outTargetIDs);
extern UInt16Vec* BuddyMirrorPattern_getStripeTargetIDs(StripePattern* this);
extern unsigned BuddyMirrorPattern_getMinNumTargets(StripePattern* this);
extern unsigned BuddyMirrorPattern_getDefaultNumTargets(StripePattern* this);
struct BuddyMirrorPattern
{
StripePattern stripePattern;
UInt16Vec mirrorBuddyGroupIDs;
unsigned defaultNumTargets;
};
/**
* @param mirrorBuddyGroupIDs will be copied
* @param defaultNumTargets default number of targets (0 for app-level default)
*/
void BuddyMirrorPattern_init(BuddyMirrorPattern* this,
unsigned chunkSize, UInt16Vec* mirrorBuddyGroupIDs, unsigned defaultNumTargets)
{
StripePattern_initFromPatternType( (StripePattern*)this, STRIPEPATTERN_BuddyMirror, chunkSize);
// assign virtual functions
__BuddyMirrorPattern_assignVirtualFunctions(this);
// init attribs
UInt16Vec_init(&this->mirrorBuddyGroupIDs);
ListTk_copyUInt16ListToVec( (UInt16List*)mirrorBuddyGroupIDs, &this->mirrorBuddyGroupIDs);
this->defaultNumTargets = defaultNumTargets ? defaultNumTargets : 4;
}
/**
* Note: for deserialization only
*/
void BuddyMirrorPattern_initFromChunkSize(BuddyMirrorPattern* this, unsigned chunkSize)
{
StripePattern_initFromPatternType( (StripePattern*)this, STRIPEPATTERN_BuddyMirror, chunkSize);
// assign virtual functions
__BuddyMirrorPattern_assignVirtualFunctions(this);
// init attribs
UInt16Vec_init(&this->mirrorBuddyGroupIDs);
}
/**
* @param mirrorBuddyGroupIDs will be copied
* @param defaultNumTargets default number of targets (0 for app-level default)
*/
BuddyMirrorPattern* BuddyMirrorPattern_construct(
unsigned chunkSize, UInt16Vec* mirrorBuddyGroupIDs, unsigned defaultNumTargets)
{
struct BuddyMirrorPattern* this = os_kmalloc(sizeof(*this) );
BuddyMirrorPattern_init(this, chunkSize, mirrorBuddyGroupIDs, defaultNumTargets);
return this;
}
/**
* Note: for deserialization only
*/
BuddyMirrorPattern* BuddyMirrorPattern_constructFromChunkSize(unsigned chunkSize)
{
struct BuddyMirrorPattern* this = os_kmalloc(sizeof(*this) );
BuddyMirrorPattern_initFromChunkSize(this, chunkSize);
return this;
}
void BuddyMirrorPattern_uninit(StripePattern* this)
{
BuddyMirrorPattern* thisCast = (BuddyMirrorPattern*)this;
UInt16Vec_uninit(&thisCast->mirrorBuddyGroupIDs);
}
void __BuddyMirrorPattern_assignVirtualFunctions(BuddyMirrorPattern* this)
{
( (StripePattern*)this)->uninit = BuddyMirrorPattern_uninit;
( (StripePattern*)this)->deserializePattern = BuddyMirrorPattern_deserializePattern;
( (StripePattern*)this)->getStripeTargetIndex = BuddyMirrorPattern_getStripeTargetIndex;
( (StripePattern*)this)->getStripeTargetID = BuddyMirrorPattern_getStripeTargetID;
( (StripePattern*)this)->getStripeTargetIDsCopy = BuddyMirrorPattern_getStripeTargetIDsCopy;
( (StripePattern*)this)->getStripeTargetIDs = BuddyMirrorPattern_getStripeTargetIDs;
( (StripePattern*)this)->getMinNumTargets = BuddyMirrorPattern_getMinNumTargets;
( (StripePattern*)this)->getDefaultNumTargets = BuddyMirrorPattern_getDefaultNumTargets;
}
#endif /*BUDDYMIRRORPATTERN_H_*/

View File

@@ -0,0 +1,70 @@
#include <common/toolkit/MathTk.h>
#include "Raid0Pattern.h"
bool Raid0Pattern_deserializePattern(StripePattern* this, DeserializeCtx* ctx)
{
Raid0Pattern* thisCast = (Raid0Pattern*)this;
RawList targetIDsList;
// defaultNumTargets
if(!Serialization_deserializeUInt(ctx, &thisCast->defaultNumTargets) )
return false;
// targetIDs
if(!Serialization_deserializeUInt16VecPreprocess(ctx, &targetIDsList) )
return false;
if(!Serialization_deserializeUInt16Vec(&targetIDsList, &thisCast->stripeTargetIDs) )
return false;
// check targetIDs
if(!UInt16Vec_length(&thisCast->stripeTargetIDs) )
return false;
return true;
}
size_t Raid0Pattern_getStripeTargetIndex(StripePattern* this, int64_t pos)
{
struct Raid0Pattern* p = container_of(this, struct Raid0Pattern, stripePattern);
return (pos / this->chunkSize) % UInt16Vec_length(&p->stripeTargetIDs);
}
uint16_t Raid0Pattern_getStripeTargetID(StripePattern* this, int64_t pos)
{
Raid0Pattern* thisCast = (Raid0Pattern*)this;
size_t targetIndex = Raid0Pattern_getStripeTargetIndex(this, pos);
return UInt16Vec_at(&thisCast->stripeTargetIDs, targetIndex);
}
void Raid0Pattern_getStripeTargetIDsCopy(StripePattern* this, UInt16Vec* outTargetIDs)
{
Raid0Pattern* thisCast = (Raid0Pattern*)this;
ListTk_copyUInt16ListToVec( (UInt16List*)&thisCast->stripeTargetIDs, outTargetIDs);
}
UInt16Vec* Raid0Pattern_getStripeTargetIDs(StripePattern* this)
{
Raid0Pattern* thisCast = (Raid0Pattern*)this;
return &thisCast->stripeTargetIDs;
}
unsigned Raid0Pattern_getMinNumTargets(StripePattern* this)
{
return 1;
}
unsigned Raid0Pattern_getDefaultNumTargets(StripePattern* this)
{
Raid0Pattern* thisCast = (Raid0Pattern*)this;
return thisCast->defaultNumTargets;
}

View File

@@ -0,0 +1,124 @@
#ifndef RAID0PATTERN_H_
#define RAID0PATTERN_H_
#include <common/toolkit/Serialization.h>
#include "StripePattern.h"
struct Raid0Pattern;
typedef struct Raid0Pattern Raid0Pattern;
static inline void Raid0Pattern_init(Raid0Pattern* this,
unsigned chunkSize, UInt16Vec* stripeTargetIDs, unsigned defaultNumTargets);
static inline void Raid0Pattern_initFromChunkSize(Raid0Pattern* this, unsigned chunkSize);
static inline Raid0Pattern* Raid0Pattern_construct(
unsigned chunkSize, UInt16Vec* stripeTargetIDs, unsigned defaultNumTargets);
static inline Raid0Pattern* Raid0Pattern_constructFromChunkSize(unsigned chunkSize);
static inline void Raid0Pattern_uninit(StripePattern* this);
static inline void __Raid0Pattern_assignVirtualFunctions(Raid0Pattern* this);
// virtual functions
extern bool Raid0Pattern_deserializePattern(StripePattern* this, DeserializeCtx* ctx);
extern size_t Raid0Pattern_getStripeTargetIndex(StripePattern* this, int64_t pos);
extern uint16_t Raid0Pattern_getStripeTargetID(StripePattern* this, int64_t pos);
extern void Raid0Pattern_getStripeTargetIDsCopy(StripePattern* this, UInt16Vec* outTargetIDs);
extern UInt16Vec* Raid0Pattern_getStripeTargetIDs(StripePattern* this);
extern unsigned Raid0Pattern_getMinNumTargets(StripePattern* this);
extern unsigned Raid0Pattern_getDefaultNumTargets(StripePattern* this);
struct Raid0Pattern
{
StripePattern stripePattern;
UInt16Vec stripeTargetIDs;
unsigned defaultNumTargets;
};
/**
* @param stripeTargetIDs will be copied
* @param defaultNumTargets default number of targets (0 for app-level default)
*/
void Raid0Pattern_init(Raid0Pattern* this,
unsigned chunkSize, UInt16Vec* stripeTargetIDs, unsigned defaultNumTargets)
{
StripePattern_initFromPatternType( (StripePattern*)this, STRIPEPATTERN_Raid0, chunkSize);
// assign virtual functions
__Raid0Pattern_assignVirtualFunctions(this);
// init attribs
UInt16Vec_init(&this->stripeTargetIDs);
ListTk_copyUInt16ListToVec( (UInt16List*)stripeTargetIDs, &this->stripeTargetIDs);
this->defaultNumTargets = defaultNumTargets ? defaultNumTargets : 4;
}
/**
* Note: for deserialization only
*/
void Raid0Pattern_initFromChunkSize(Raid0Pattern* this, unsigned chunkSize)
{
StripePattern_initFromPatternType( (StripePattern*)this, STRIPEPATTERN_Raid0, chunkSize);
// assign virtual functions
__Raid0Pattern_assignVirtualFunctions(this);
// init attribs
UInt16Vec_init(&this->stripeTargetIDs);
}
/**
* @param stripeTargetIDs will be copied
* @param defaultNumTargets default number of targets (0 for app-level default)
*/
Raid0Pattern* Raid0Pattern_construct(
unsigned chunkSize, UInt16Vec* stripeTargetIDs, unsigned defaultNumTargets)
{
struct Raid0Pattern* this = os_kmalloc(sizeof(*this) );
Raid0Pattern_init(this, chunkSize, stripeTargetIDs, defaultNumTargets);
return this;
}
/**
* Note: for deserialization only
*/
Raid0Pattern* Raid0Pattern_constructFromChunkSize(unsigned chunkSize)
{
struct Raid0Pattern* this = os_kmalloc(sizeof(*this) );
Raid0Pattern_initFromChunkSize(this, chunkSize);
return this;
}
void Raid0Pattern_uninit(StripePattern* this)
{
Raid0Pattern* thisCast = (Raid0Pattern*)this;
UInt16Vec_uninit(&thisCast->stripeTargetIDs);
}
void __Raid0Pattern_assignVirtualFunctions(Raid0Pattern* this)
{
( (StripePattern*)this)->uninit = Raid0Pattern_uninit;
( (StripePattern*)this)->deserializePattern = Raid0Pattern_deserializePattern;
( (StripePattern*)this)->getStripeTargetIndex = Raid0Pattern_getStripeTargetIndex;
( (StripePattern*)this)->getStripeTargetID = Raid0Pattern_getStripeTargetID;
( (StripePattern*)this)->getStripeTargetIDsCopy = Raid0Pattern_getStripeTargetIDsCopy;
( (StripePattern*)this)->getStripeTargetIDs = Raid0Pattern_getStripeTargetIDs;
( (StripePattern*)this)->getMinNumTargets = Raid0Pattern_getMinNumTargets;
( (StripePattern*)this)->getDefaultNumTargets = Raid0Pattern_getDefaultNumTargets;
}
#endif /*RAID0PATTERN_H_*/

View File

@@ -0,0 +1,113 @@
#include <common/toolkit/MathTk.h>
#include "Raid10Pattern.h"
bool Raid10Pattern_deserializePattern(StripePattern* this, DeserializeCtx* ctx)
{
Raid10Pattern* thisCast = (Raid10Pattern*)this;
RawList targetIDsList;
RawList mirrorTargetIDsList;
// defaultNumTargets
if(!Serialization_deserializeUInt(ctx, &thisCast->defaultNumTargets) )
return false;
// targetIDs
if(!Serialization_deserializeUInt16VecPreprocess(ctx, &targetIDsList) )
return false;
if(!Serialization_deserializeUInt16Vec(&targetIDsList, &thisCast->stripeTargetIDs) )
return false;
// mirrorTargetIDs
if(!Serialization_deserializeUInt16VecPreprocess(ctx, &mirrorTargetIDsList) )
return false;
if(!Serialization_deserializeUInt16Vec(&mirrorTargetIDsList, &thisCast->mirrorTargetIDs) )
return false;
// calc stripeSetSize
thisCast->stripeSetSize = UInt16Vec_length(
&thisCast->stripeTargetIDs) * StripePattern_getChunkSize(this);
return true;
}
size_t Raid10Pattern_getStripeTargetIndex(StripePattern* this, int64_t pos)
{
Raid10Pattern* thisCast = (Raid10Pattern*)this;
/* the code below is an optimization (wrt division/modulo) of following the two lines:
int64_t stripeSetInnerOffset = pos % thisCast->stripeSetSize;
int64_t targetIndex = stripeSetInnerOffset / StripePattern_getChunkSize(this); */
// note: do_div(n64, base32) assigns the result to n64 and returns the remainder!
// (do_div is needed for 64bit division on 32bit archs)
unsigned stripeSetSize = thisCast->stripeSetSize;
int64_t stripeSetInnerOffset;
unsigned chunkSize;
size_t targetIndex;
if(MathTk_isPowerOfTwo(stripeSetSize) )
{ // quick path => no modulo needed
stripeSetInnerOffset = pos & (stripeSetSize - 1);
}
else
{ // slow path => modulo
stripeSetInnerOffset = do_div(pos, thisCast->stripeSetSize);
// warning: do_div modifies pos! (so do not use it afterwards within this method)
}
chunkSize = StripePattern_getChunkSize(this);
// this is "a=b/c" written as "a=b>>log2(c)", because chunkSize is a power of two.
targetIndex = (stripeSetInnerOffset >> MathTk_log2Int32(chunkSize) );
return targetIndex;
}
uint16_t Raid10Pattern_getStripeTargetID(StripePattern* this, int64_t pos)
{
Raid10Pattern* thisCast = (Raid10Pattern*)this;
size_t targetIndex = Raid10Pattern_getStripeTargetIndex(this, pos);
return UInt16Vec_at(&thisCast->stripeTargetIDs, targetIndex);
}
void Raid10Pattern_getStripeTargetIDsCopy(StripePattern* this, UInt16Vec* outTargetIDs)
{
Raid10Pattern* thisCast = (Raid10Pattern*)this;
ListTk_copyUInt16ListToVec( (UInt16List*)&thisCast->stripeTargetIDs, outTargetIDs);
}
UInt16Vec* Raid10Pattern_getStripeTargetIDs(StripePattern* this)
{
Raid10Pattern* thisCast = (Raid10Pattern*)this;
return &thisCast->stripeTargetIDs;
}
UInt16Vec* Raid10Pattern_getMirrorTargetIDs(StripePattern* this)
{
Raid10Pattern* thisCast = (Raid10Pattern*)this;
return &thisCast->mirrorTargetIDs;
}
unsigned Raid10Pattern_getMinNumTargets(StripePattern* this)
{
return 2;
}
unsigned Raid10Pattern_getDefaultNumTargets(StripePattern* this)
{
Raid10Pattern* thisCast = (Raid10Pattern*)this;
return thisCast->defaultNumTargets;
}

View File

@@ -0,0 +1,96 @@
#ifndef RAID10PATTERN_H_
#define RAID10PATTERN_H_
#include <common/toolkit/Serialization.h>
#include "StripePattern.h"
struct Raid10Pattern;
typedef struct Raid10Pattern Raid10Pattern;
static inline void Raid10Pattern_initFromChunkSize(Raid10Pattern* this, unsigned chunkSize);
static inline Raid10Pattern* Raid10Pattern_constructFromChunkSize(unsigned chunkSize);
static inline void Raid10Pattern_uninit(StripePattern* this);
static inline void __Raid10Pattern_assignVirtualFunctions(Raid10Pattern* this);
// virtual functions
extern bool Raid10Pattern_deserializePattern(StripePattern* this, DeserializeCtx* ctx);
extern size_t Raid10Pattern_getStripeTargetIndex(StripePattern* this, int64_t pos);
extern uint16_t Raid10Pattern_getStripeTargetID(StripePattern* this, int64_t pos);
extern void Raid10Pattern_getStripeTargetIDsCopy(StripePattern* this, UInt16Vec* outTargetIDs);
extern UInt16Vec* Raid10Pattern_getStripeTargetIDs(StripePattern* this);
extern UInt16Vec* Raid10Pattern_getMirrorTargetIDs(StripePattern* this);
extern unsigned Raid10Pattern_getMinNumTargets(StripePattern* this);
extern unsigned Raid10Pattern_getDefaultNumTargets(StripePattern* this);
/**
* Note: We don't have the general _construct() and _init() methods implemented, because we just
* don't need them at the moment in the client. (We only have the special _constructFrom...() and
* _initFrom...() for deserialization.)
*/
struct Raid10Pattern
{
StripePattern stripePattern;
UInt16Vec stripeTargetIDs;
UInt16Vec mirrorTargetIDs;
unsigned stripeSetSize; // = numStripeTargets * chunkSize
unsigned defaultNumTargets;
};
/**
* Note: for deserialization only
*/
void Raid10Pattern_initFromChunkSize(Raid10Pattern* this, unsigned chunkSize)
{
StripePattern_initFromPatternType( (StripePattern*)this, STRIPEPATTERN_Raid10, chunkSize);
// assign virtual functions
__Raid10Pattern_assignVirtualFunctions(this);
// init attribs
UInt16Vec_init(&this->stripeTargetIDs);
UInt16Vec_init(&this->mirrorTargetIDs);
}
/**
* Note: for deserialization only
*/
Raid10Pattern* Raid10Pattern_constructFromChunkSize(unsigned chunkSize)
{
struct Raid10Pattern* this = os_kmalloc(sizeof(*this) );
Raid10Pattern_initFromChunkSize(this, chunkSize);
return this;
}
void Raid10Pattern_uninit(StripePattern* this)
{
Raid10Pattern* thisCast = (Raid10Pattern*)this;
UInt16Vec_uninit(&thisCast->stripeTargetIDs);
UInt16Vec_uninit(&thisCast->mirrorTargetIDs);
}
void __Raid10Pattern_assignVirtualFunctions(Raid10Pattern* this)
{
( (StripePattern*)this)->uninit = Raid10Pattern_uninit;
( (StripePattern*)this)->deserializePattern = Raid10Pattern_deserializePattern;
( (StripePattern*)this)->getStripeTargetIndex = Raid10Pattern_getStripeTargetIndex;
( (StripePattern*)this)->getStripeTargetID = Raid10Pattern_getStripeTargetID;
( (StripePattern*)this)->getStripeTargetIDsCopy = Raid10Pattern_getStripeTargetIDsCopy;
( (StripePattern*)this)->getStripeTargetIDs = Raid10Pattern_getStripeTargetIDs;
( (StripePattern*)this)->getMirrorTargetIDs = Raid10Pattern_getMirrorTargetIDs;
( (StripePattern*)this)->getMinNumTargets = Raid10Pattern_getMinNumTargets;
( (StripePattern*)this)->getDefaultNumTargets = Raid10Pattern_getDefaultNumTargets;
}
#endif /* RAID10PATTERN_H_ */

View File

@@ -0,0 +1,37 @@
#include "SimplePattern.h"
bool SimplePattern_deserializePattern(StripePattern* this, DeserializeCtx* ctx)
{
return true;
}
size_t SimplePattern_getStripeTargetIndex(StripePattern* this, int64_t pos)
{
return 0;
}
uint16_t SimplePattern_getStripeTargetID(StripePattern* this, int64_t pos)
{
return 0;
}
unsigned SimplePattern_getMinNumTargets(StripePattern* this)
{
return 0;
}
unsigned SimplePattern_getDefaultNumTargets(StripePattern* this)
{
return 0;
}
void SimplePattern_getStripeTargetIDsCopy(StripePattern* this, UInt16Vec* outTargetIDs)
{
// nothing to be done here
}
UInt16Vec* SimplePattern_getStripeTargetIDs(StripePattern* this)
{
return NULL;
}

View File

@@ -0,0 +1,65 @@
#ifndef SIMPLEPATTERN_H_
#define SIMPLEPATTERN_H_
#include "StripePattern.h"
struct SimplePattern;
typedef struct SimplePattern SimplePattern;
static inline void SimplePattern_init(SimplePattern* this,
unsigned patternType, unsigned chunkSize);
static inline SimplePattern* SimplePattern_construct(
unsigned patternType, unsigned chunkSize);
static inline void SimplePattern_uninit(StripePattern* this);
// virtual functions
extern bool SimplePattern_deserializePattern(StripePattern* this, DeserializeCtx* ctx);
extern size_t SimplePattern_getStripeTargetIndex(StripePattern* this, int64_t pos);
extern uint16_t SimplePattern_getStripeTargetID(StripePattern* this, int64_t pos);
extern unsigned SimplePattern_getMinNumTargets(StripePattern* this);
extern unsigned SimplePattern_getDefaultNumTargets(StripePattern* this);
extern void SimplePattern_getStripeTargetIDsCopy(StripePattern* this, UInt16Vec* outTargetIDs);
extern UInt16Vec* SimplePattern_getStripeTargetIDs(StripePattern* this);
struct SimplePattern
{
StripePattern stripePattern;
};
void SimplePattern_init(SimplePattern* this,
unsigned patternType, unsigned chunkSize)
{
StripePattern_initFromPatternType( (StripePattern*)this, patternType, chunkSize);
// assign virtual functions
( (StripePattern*)this)->uninit = SimplePattern_uninit;
( (StripePattern*)this)->deserializePattern = SimplePattern_deserializePattern;
( (StripePattern*)this)->getStripeTargetIndex = SimplePattern_getStripeTargetIndex;
( (StripePattern*)this)->getStripeTargetID = SimplePattern_getStripeTargetID;
( (StripePattern*)this)->getStripeTargetIDsCopy = SimplePattern_getStripeTargetIDsCopy;
( (StripePattern*)this)->getStripeTargetIDs = SimplePattern_getStripeTargetIDs;
( (StripePattern*)this)->getMinNumTargets = SimplePattern_getMinNumTargets;
( (StripePattern*)this)->getDefaultNumTargets = SimplePattern_getDefaultNumTargets;
}
SimplePattern* SimplePattern_construct(unsigned patternType, unsigned chunkSize)
{
struct SimplePattern* this = os_kmalloc(sizeof(struct SimplePattern) );
SimplePattern_init(this, patternType, chunkSize);
return this;
}
void SimplePattern_uninit(StripePattern* this)
{
}
#endif /*SIMPLEPATTERN_H_*/

View File

@@ -0,0 +1,139 @@
#include <common/toolkit/Serialization.h>
#include "BuddyMirrorPattern.h"
#include "Raid0Pattern.h"
#include "Raid10Pattern.h"
#include "SimplePattern.h"
#include "StripePattern.h"
#define HAS_NO_POOL_FLAG (1 << 24)
/**
* Calls the virtual uninit method and kfrees the object.
*/
void StripePattern_virtualDestruct(StripePattern* this)
{
this->uninit(this);
kfree(this);
}
bool StripePattern_deserializePatternPreprocess(DeserializeCtx* ctx,
const char** outPatternStart, uint32_t* outPatternLength)
{
DeserializeCtx temp = *ctx;
if(!Serialization_deserializeUInt(&temp, outPatternLength))
return false;
*outPatternStart = ctx->data;
if (*outPatternLength > ctx->length)
return false;
ctx->data += *outPatternLength;
ctx->length -= *outPatternLength;
return true;
}
/**
* @return outPattern; outPattern->patternType is STRIPEPATTERN_Invalid on error
*/
StripePattern* StripePattern_createFromBuf(const char* patternStart,
uint32_t patternLength)
{
struct StripePatternHeader patternHeader;
StripePattern* pattern;
DeserializeCtx ctx = {
.data = patternStart,
.length = patternLength,
};
bool deserRes;
if (!__StripePattern_deserializeHeader(&ctx, &patternHeader))
return (StripePattern*)SimplePattern_construct(STRIPEPATTERN_Invalid, 0);
switch (patternHeader.patternType)
{
case STRIPEPATTERN_Raid0:
{
pattern = (StripePattern*)Raid0Pattern_constructFromChunkSize(patternHeader.chunkSize);
} break;
case STRIPEPATTERN_Raid10:
{
pattern = (StripePattern*)Raid10Pattern_constructFromChunkSize(patternHeader.chunkSize);
} break;
case STRIPEPATTERN_BuddyMirror:
{
pattern = (StripePattern*)BuddyMirrorPattern_constructFromChunkSize(
patternHeader.chunkSize);
} break;
default:
{
pattern = (StripePattern*)SimplePattern_construct(STRIPEPATTERN_Invalid, 0);
return pattern;
} break;
}
deserRes = pattern->deserializePattern(pattern, &ctx);
if(unlikely(!deserRes) )
{ // deserialization failed => discard half-initialized pattern and create new invalid pattern
StripePattern_virtualDestruct(pattern);
pattern = (StripePattern*)SimplePattern_construct(STRIPEPATTERN_Invalid, 0);
return pattern;
}
return pattern;
}
bool __StripePattern_deserializeHeader(DeserializeCtx* ctx,
struct StripePatternHeader* outPatternHeader)
{
// pattern length
if(!Serialization_deserializeUInt(ctx, &outPatternHeader->patternLength) )
return false;
// pattern type
if(!Serialization_deserializeUInt(ctx, &outPatternHeader->patternType) )
return false;
// chunkSize
if(!Serialization_deserializeUInt(ctx, &outPatternHeader->chunkSize) )
return false;
// storagePoolId
if (!(outPatternHeader->patternType & HAS_NO_POOL_FLAG)) {
if(!StoragePoolId_deserialize(ctx, &outPatternHeader->storagePoolId) )
return false;
}
outPatternHeader->patternType &= ~HAS_NO_POOL_FLAG;
// check length field
if(outPatternHeader->patternLength < STRIPEPATTERN_HEADER_LENGTH)
return false;
// check chunkSize
if(!outPatternHeader->chunkSize)
return false;
return true;
}
/**
* Predefined virtual method returning NULL. Will be overridden by StripePatterns (e.g. Raid10)
* that actually do have mirror targets.
*
* @return NULL for patterns that don't have mirror targets.
*/
UInt16Vec* StripePattern_getMirrorTargetIDs(StripePattern* this)
{
return NULL;
}

View File

@@ -0,0 +1,146 @@
#ifndef STRIPEPATTERN_H_
#define STRIPEPATTERN_H_
/**
* Note: Do not instantiate this "class" directly (it contains pure virtual functions)
*/
#include <common/Common.h>
#include <common/storage/StoragePoolId.h>
#include <common/toolkit/SerializationTypes.h>
#include <common/toolkit/vector/UInt16Vec.h>
// pattern types
#define STRIPEPATTERN_Invalid 0
#define STRIPEPATTERN_Raid0 1
#define STRIPEPATTERN_Raid10 2
#define STRIPEPATTERN_BuddyMirror 3
// minimum allowed stripe pattern chunk size (in bytes)
#define STRIPEPATTERN_MIN_CHUNKSIZE (1024*64)
// pattern serialization defs
#define STRIPEPATTERN_HEADER_LENGTH \
(sizeof(unsigned) + sizeof(unsigned) + sizeof(unsigned))
/* length + type + chunkSize*/
struct StripePatternHeader
{
// everything in this struct is in host byte order!
unsigned patternLength; // in bytes
unsigned patternType; // the type of pattern, defined as STRIPEPATTERN_x
unsigned chunkSize;
// storagePoolId is unused in the client at the moment; however we deserialize it to avoid human
// errors later
StoragePoolId storagePoolId;
};
struct StripePattern;
typedef struct StripePattern StripePattern;
static inline void StripePattern_initFromPatternType(StripePattern* this,
unsigned patternType, unsigned chunkSize);
extern void StripePattern_virtualDestruct(struct StripePattern* this);
extern bool StripePattern_deserializePatternPreprocess(DeserializeCtx* ctx,
const char** outPatternStart, uint32_t* outPatternLength);
extern bool __StripePattern_deserializeHeader(DeserializeCtx* ctx,
struct StripePatternHeader* outPatternHeader);
// static functions
extern StripePattern* StripePattern_createFromBuf(const char* patternStart,
uint32_t patternLength);
// virtual functions
extern UInt16Vec* StripePattern_getMirrorTargetIDs(StripePattern* this);
// getters & setters
static inline int StripePattern_getPatternType(StripePattern* this);
static inline unsigned StripePattern_getChunkSize(StripePattern* this);
static inline int64_t StripePattern_getChunkStart(StripePattern* this, int64_t pos);
static inline int64_t StripePattern_getNextChunkStart(StripePattern* this, int64_t pos);
static inline int64_t StripePattern_getChunkEnd(StripePattern* this, int64_t pos);
struct StripePattern
{
unsigned patternType; // STRIPEPATTERN_...
unsigned chunkSize; // must be a power of two (optimizations rely on it)
unsigned serialPatternLength; // for (de)serialization
// virtual functions
void (*uninit) (StripePattern* this);
// (de)serialization
bool (*deserializePattern) (StripePattern* this, DeserializeCtx* ctx);
size_t (*getStripeTargetIndex) (StripePattern* this, int64_t pos);
uint16_t (*getStripeTargetID) (StripePattern* this, int64_t pos);
void (*getStripeTargetIDsCopy) (StripePattern* this, UInt16Vec* outTargetIDs);
UInt16Vec* (*getStripeTargetIDs) (StripePattern* this);
UInt16Vec* (*getMirrorTargetIDs) (StripePattern* this);
unsigned (*getMinNumTargets) (StripePattern* this);
unsigned (*getDefaultNumTargets) (StripePattern* this);
};
void StripePattern_initFromPatternType(StripePattern* this, unsigned patternType,
unsigned chunkSize)
{
this->patternType = patternType;
this->chunkSize = chunkSize;
this->serialPatternLength = 0;
// pre-defined virtual methods
this->getMirrorTargetIDs = StripePattern_getMirrorTargetIDs;
}
int StripePattern_getPatternType(StripePattern* this)
{
return this->patternType;
}
unsigned StripePattern_getChunkSize(StripePattern* this)
{
return this->chunkSize;
}
int64_t StripePattern_getChunkStart(StripePattern* this, int64_t pos)
{
// the code below is an optimization (wrt division) for the following line:
// int64_t chunkStart = pos - (pos % this->chunkSize);
// "& chunkSize -1" instead of "%", because chunkSize is a power of two
unsigned posModChunkSize = pos & (this->chunkSize - 1);
int64_t chunkStart = pos - posModChunkSize;
return chunkStart;
}
/**
* Get the exact file position where the next chunk starts
*/
int64_t StripePattern_getNextChunkStart(StripePattern* this, int64_t pos)
{
return StripePattern_getChunkStart(this, pos) + this->chunkSize;
}
/**
* Get the exact file position where the current chunk ends
*/
int64_t StripePattern_getChunkEnd(StripePattern* this, int64_t pos)
{
return StripePattern_getNextChunkStart(this, pos) - 1;
}
#endif /*STRIPEPATTERN_H_*/