New upstream version 8.1.0
This commit is contained in:
527
client_module/source/os/OsCompat.c
Normal file
527
client_module/source/os/OsCompat.c
Normal file
@@ -0,0 +1,527 @@
|
||||
/*
|
||||
* Compatibility functions for older Linux versions
|
||||
*/
|
||||
|
||||
#include <linux/mm.h> // for old sles10 kernels, which forgot to include it in backing-dev.h
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/writeback.h>
|
||||
|
||||
#include <os/OsCompat.h>
|
||||
#include <app/App.h>
|
||||
#include <app/log/Logger.h>
|
||||
#include <common/Common.h>
|
||||
#include <filesystem/FhgfsOpsSuper.h>
|
||||
|
||||
#ifndef KERNEL_HAS_MEMDUP_USER
|
||||
/**
|
||||
* memdup_user - duplicate memory region from user space
|
||||
*
|
||||
* @src: source address in user space
|
||||
* @len: number of bytes to copy
|
||||
*
|
||||
* Returns an ERR_PTR() on failure.
|
||||
*/
|
||||
void *memdup_user(const void __user *src, size_t len)
|
||||
{
|
||||
void *p;
|
||||
|
||||
/*
|
||||
* Always use GFP_KERNEL, since copy_from_user() can sleep and
|
||||
* cause pagefault, which makes it pointless to use GFP_NOFS
|
||||
* or GFP_ATOMIC.
|
||||
*/
|
||||
p = kmalloc(len, GFP_KERNEL);
|
||||
if (!p)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (copy_from_user(p, src, len)) {
|
||||
kfree(p);
|
||||
return ERR_PTR(-EFAULT);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
#endif // memdup_user, LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
|
||||
|
||||
|
||||
#if defined(KERNEL_HAS_SB_BDI) && !defined(KERNEL_HAS_BDI_SETUP_AND_REGISTER) && \
|
||||
!defined(KERNEL_HAS_SUPER_SETUP_BDI_NAME)
|
||||
/*
|
||||
* For use from filesystems to quickly init and register a bdi associated
|
||||
* with dirty writeback
|
||||
*/
|
||||
int bdi_setup_and_register(struct backing_dev_info *bdi, char *name,
|
||||
unsigned int cap)
|
||||
{
|
||||
static atomic_long_t fhgfs_bdiSeq = ATOMIC_LONG_INIT(0);
|
||||
char tmp[32];
|
||||
int err;
|
||||
|
||||
bdi->name = name;
|
||||
bdi->capabilities = cap;
|
||||
err = bdi_init(bdi);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
sprintf(tmp, "%.28s%s", name, "-%d");
|
||||
err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&fhgfs_bdiSeq));
|
||||
if (err) {
|
||||
bdi_destroy(bdi);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* NOTE: We can't do a feature detection for find_get_pages_tag(), as
|
||||
* this function is in all headers of all supported kernel versions.
|
||||
* However, it is only _exported_ since 2.6.22 and also only
|
||||
* exported in RHEL >=5.10. */
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
|
||||
/**
|
||||
* find_get_pages_tag - find and return pages that match @tag
|
||||
* @mapping: the address_space to search
|
||||
* @index: the starting page index
|
||||
* @tag: the tag index
|
||||
* @nr_pages: the maximum number of pages
|
||||
* @pages: where the resulting pages are placed
|
||||
*
|
||||
* Like find_get_pages, except we only return pages which are tagged with
|
||||
* @tag. We update @index to index the next page for the traversal.
|
||||
*/
|
||||
unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
|
||||
int tag, unsigned int nr_pages, struct page **pages)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int ret;
|
||||
|
||||
read_lock_irq(&mapping->tree_lock);
|
||||
ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
|
||||
(void **)pages, *index, nr_pages, tag);
|
||||
for (i = 0; i < ret; i++)
|
||||
page_cache_get(pages[i]);
|
||||
if (ret)
|
||||
*index = pages[ret - 1]->index + 1;
|
||||
read_unlock_irq(&mapping->tree_lock);
|
||||
return ret;
|
||||
}
|
||||
#endif // find_get_pages_tag() for <2.6.22
|
||||
|
||||
|
||||
#ifndef KERNEL_HAS_D_MAKE_ROOT
|
||||
|
||||
/**
|
||||
* This is the former d_alloc_root with an additional iput on error.
|
||||
*/
|
||||
struct dentry *d_make_root(struct inode *root_inode)
|
||||
{
|
||||
struct dentry* allocRes = d_alloc_root(root_inode);
|
||||
if(!allocRes)
|
||||
iput(root_inode);
|
||||
|
||||
return allocRes;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KERNEL_HAS_D_MATERIALISE_UNIQUE
|
||||
/**
|
||||
* d_materialise_unique() was merged into d_splice_alias() in linux-3.19
|
||||
*/
|
||||
struct dentry* d_materialise_unique(struct dentry *dentry, struct inode *inode)
|
||||
{
|
||||
return d_splice_alias(inode, dentry);
|
||||
}
|
||||
#endif // KERNEL_HAS_D_MATERIALISE_UNIQUE
|
||||
|
||||
/**
|
||||
* Note: Call this once during module init (and remember to call kmem_cache_destroy() )
|
||||
*/
|
||||
#if defined(KERNEL_HAS_KMEMCACHE_CACHE_FLAGS_CTOR)
|
||||
struct kmem_cache* OsCompat_initKmemCache(const char* cacheName, size_t cacheSize,
|
||||
void initFuncPtr(void* initObj, struct kmem_cache* cache, unsigned long flags) )
|
||||
#elif defined(KERNEL_HAS_KMEMCACHE_CACHE_CTOR)
|
||||
struct kmem_cache* OsCompat_initKmemCache(const char* cacheName, size_t cacheSize,
|
||||
void initFuncPtr(struct kmem_cache* cache, void* initObj) )
|
||||
#else
|
||||
struct kmem_cache* OsCompat_initKmemCache(const char* cacheName, size_t cacheSize,
|
||||
void initFuncPtr(void* initObj) )
|
||||
#endif // LINUX_VERSION_CODE
|
||||
{
|
||||
struct kmem_cache* cache;
|
||||
|
||||
#if defined(KERNEL_HAS_SLAB_MEM_SPREAD)
|
||||
unsigned long cacheFlags = SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD;
|
||||
#else
|
||||
unsigned long cacheFlags = SLAB_RECLAIM_ACCOUNT;
|
||||
#endif
|
||||
#if defined(KERNEL_HAS_KMEMCACHE_DTOR)
|
||||
cache = kmem_cache_create(cacheName, cacheSize, 0, cacheFlags, initFuncPtr, NULL);
|
||||
#else
|
||||
cache = kmem_cache_create(cacheName, cacheSize, 0, cacheFlags, initFuncPtr);
|
||||
#endif // LINUX_VERSION_CODE
|
||||
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
#ifndef rbtree_postorder_for_each_entry_safe
|
||||
static struct rb_node* rb_left_deepest_node(const struct rb_node* node)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if (node->rb_left)
|
||||
node = node->rb_left;
|
||||
else
|
||||
if (node->rb_right)
|
||||
node = node->rb_right;
|
||||
else
|
||||
return (struct rb_node*) node;
|
||||
}
|
||||
}
|
||||
|
||||
struct rb_node* rb_next_postorder(const struct rb_node* node)
|
||||
{
|
||||
const struct rb_node *parent;
|
||||
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
parent = rb_parent(node);
|
||||
|
||||
/* If we're sitting on node, we've already seen our children */
|
||||
if (parent && node == parent->rb_left && parent->rb_right)
|
||||
{
|
||||
/* If we are the parent's left node, go to the parent's right
|
||||
* node then all the way down to the left */
|
||||
return rb_left_deepest_node(parent->rb_right);
|
||||
}
|
||||
else
|
||||
/* Otherwise we are the parent's right node, and the parent
|
||||
* should be next */
|
||||
return (struct rb_node*) parent;
|
||||
}
|
||||
|
||||
struct rb_node* rb_first_postorder(const struct rb_root* root)
|
||||
{
|
||||
if (!root->rb_node)
|
||||
return NULL;
|
||||
|
||||
return rb_left_deepest_node(root->rb_node);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef KERNEL_HAS_GENERIC_WRITE_CHECKS_ITER
|
||||
int os_generic_write_checks(struct file* filp, loff_t* offset, size_t* size, int isblk)
|
||||
{
|
||||
struct iovec iov = { 0, *size };
|
||||
struct iov_iter iter;
|
||||
ssize_t checkRes;
|
||||
struct kiocb iocb;
|
||||
|
||||
iov_iter_init(&iter, WRITE, &iov, 1, *size);
|
||||
init_sync_kiocb(&iocb, filp);
|
||||
iocb.ki_pos = *offset;
|
||||
|
||||
checkRes = generic_write_checks(&iocb, &iter);
|
||||
if(checkRes < 0)
|
||||
return checkRes;
|
||||
|
||||
*offset = iocb.ki_pos;
|
||||
*size = iter.count;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KERNEL_HAS_HAVE_SUBMOUNTS
|
||||
/**
|
||||
* enum d_walk_ret - action to talke during tree walk
|
||||
* @D_WALK_CONTINUE: contrinue walk
|
||||
* @D_WALK_QUIT: quit walk
|
||||
* @D_WALK_NORETRY: quit when retry is needed
|
||||
* @D_WALK_SKIP: skip this dentry and its children
|
||||
*/
|
||||
enum d_walk_ret {
|
||||
D_WALK_CONTINUE,
|
||||
D_WALK_QUIT,
|
||||
D_WALK_NORETRY,
|
||||
D_WALK_SKIP,
|
||||
};
|
||||
|
||||
/*
|
||||
* Search for at least 1 mount point in the dentry's subdirs.
|
||||
* We descend to the next level whenever the d_subdirs
|
||||
* list is non-empty and continue searching.
|
||||
*/
|
||||
|
||||
static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
|
||||
{
|
||||
int *ret = data;
|
||||
if (d_mountpoint(dentry)) {
|
||||
*ret = 1;
|
||||
return D_WALK_QUIT;
|
||||
}
|
||||
return D_WALK_CONTINUE;
|
||||
}
|
||||
|
||||
#if defined(KERNEL_HAS_DENTRY_SUBDIRS)
|
||||
/**
|
||||
* d_walk - walk the dentry tree
|
||||
* @parent: start of walk
|
||||
* @data: data passed to @enter() and @finish()
|
||||
* @enter: callback when first entering the dentry
|
||||
* @finish: callback when successfully finished the walk
|
||||
*
|
||||
* The @enter() and @finish() callbacks are called with d_lock held.
|
||||
*/
|
||||
static void d_walk(struct dentry *parent, void *data,
|
||||
enum d_walk_ret (*enter)(void *, struct dentry *),
|
||||
void (*finish)(void *))
|
||||
{
|
||||
struct dentry *this_parent;
|
||||
struct list_head *next;
|
||||
unsigned seq = 0;
|
||||
enum d_walk_ret ret;
|
||||
bool retry = true;
|
||||
|
||||
again:
|
||||
read_seqbegin_or_lock(&rename_lock, &seq);
|
||||
this_parent = parent;
|
||||
spin_lock(&this_parent->d_lock);
|
||||
|
||||
ret = enter(data, this_parent);
|
||||
switch (ret) {
|
||||
case D_WALK_CONTINUE:
|
||||
break;
|
||||
case D_WALK_QUIT:
|
||||
case D_WALK_SKIP:
|
||||
goto out_unlock;
|
||||
case D_WALK_NORETRY:
|
||||
retry = false;
|
||||
break;
|
||||
}
|
||||
repeat:
|
||||
next = this_parent->d_subdirs.next;
|
||||
resume:
|
||||
while (next != &this_parent->d_subdirs) {
|
||||
struct list_head *tmp = next;
|
||||
struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
|
||||
next = tmp->next;
|
||||
|
||||
if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
|
||||
continue;
|
||||
|
||||
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
|
||||
|
||||
ret = enter(data, dentry);
|
||||
switch (ret) {
|
||||
case D_WALK_CONTINUE:
|
||||
break;
|
||||
case D_WALK_QUIT:
|
||||
spin_unlock(&dentry->d_lock);
|
||||
goto out_unlock;
|
||||
case D_WALK_NORETRY:
|
||||
retry = false;
|
||||
break;
|
||||
case D_WALK_SKIP:
|
||||
spin_unlock(&dentry->d_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!list_empty(&dentry->d_subdirs)) {
|
||||
spin_unlock(&this_parent->d_lock);
|
||||
#if defined(KERNEL_SPIN_RELEASE_HAS_3_ARGUMENTS)
|
||||
spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
|
||||
#else
|
||||
spin_release(&dentry->d_lock.dep_map, _RET_IP_);
|
||||
#endif
|
||||
this_parent = dentry;
|
||||
spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
|
||||
goto repeat;
|
||||
}
|
||||
spin_unlock(&dentry->d_lock);
|
||||
}
|
||||
/*
|
||||
* All done at this level ... ascend and resume the search.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
ascend:
|
||||
if (this_parent != parent) {
|
||||
struct dentry *child = this_parent;
|
||||
this_parent = child->d_parent;
|
||||
|
||||
spin_unlock(&child->d_lock);
|
||||
spin_lock(&this_parent->d_lock);
|
||||
|
||||
/* might go back up the wrong parent if we have had a rename. */
|
||||
if (need_seqretry(&rename_lock, seq))
|
||||
goto rename_retry;
|
||||
/* go into the first sibling still alive */
|
||||
do {
|
||||
next = child->d_child.next;
|
||||
if (next == &this_parent->d_subdirs)
|
||||
goto ascend;
|
||||
child = list_entry(next, struct dentry, d_child);
|
||||
} while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
|
||||
rcu_read_unlock();
|
||||
goto resume;
|
||||
}
|
||||
if (need_seqretry(&rename_lock, seq))
|
||||
goto rename_retry;
|
||||
rcu_read_unlock();
|
||||
if (finish)
|
||||
finish(data);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&this_parent->d_lock);
|
||||
done_seqretry(&rename_lock, seq);
|
||||
return;
|
||||
|
||||
rename_retry:
|
||||
spin_unlock(&this_parent->d_lock);
|
||||
rcu_read_unlock();
|
||||
BUG_ON(seq & 1);
|
||||
if (!retry)
|
||||
return;
|
||||
seq = 1;
|
||||
goto again;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/**
|
||||
* d_walk - walk the dentry tree
|
||||
* @parent: start of walk
|
||||
* @data: data passed to @enter() and @finish()
|
||||
* @enter: callback when first entering the dentry
|
||||
*
|
||||
* The @enter() callbacks are called with d_lock held.
|
||||
*/
|
||||
static void d_walk(struct dentry *parent, void *data,
|
||||
enum d_walk_ret (*enter)(void *, struct dentry *))
|
||||
{
|
||||
struct dentry *this_parent, *dentry;
|
||||
unsigned seq = 0;
|
||||
enum d_walk_ret ret;
|
||||
bool retry = true;
|
||||
|
||||
again:
|
||||
read_seqbegin_or_lock(&rename_lock, &seq);
|
||||
this_parent = parent;
|
||||
spin_lock(&this_parent->d_lock);
|
||||
|
||||
ret = enter(data, this_parent);
|
||||
switch (ret) {
|
||||
case D_WALK_CONTINUE:
|
||||
break;
|
||||
case D_WALK_QUIT:
|
||||
case D_WALK_SKIP:
|
||||
goto out_unlock;
|
||||
case D_WALK_NORETRY:
|
||||
retry = false;
|
||||
break;
|
||||
}
|
||||
repeat:
|
||||
dentry = d_first_child(this_parent);
|
||||
resume:
|
||||
hlist_for_each_entry_from(dentry, d_sib) {
|
||||
if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
|
||||
continue;
|
||||
|
||||
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
|
||||
|
||||
ret = enter(data, dentry);
|
||||
switch (ret) {
|
||||
case D_WALK_CONTINUE:
|
||||
break;
|
||||
case D_WALK_QUIT:
|
||||
spin_unlock(&dentry->d_lock);
|
||||
goto out_unlock;
|
||||
case D_WALK_NORETRY:
|
||||
retry = false;
|
||||
break;
|
||||
case D_WALK_SKIP:
|
||||
spin_unlock(&dentry->d_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!hlist_empty(&dentry->d_children)) {
|
||||
spin_unlock(&this_parent->d_lock);
|
||||
spin_release(&dentry->d_lock.dep_map, _RET_IP_);
|
||||
this_parent = dentry;
|
||||
spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
|
||||
goto repeat;
|
||||
}
|
||||
spin_unlock(&dentry->d_lock);
|
||||
}
|
||||
/*
|
||||
* All done at this level ... ascend and resume the search.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
ascend:
|
||||
if (this_parent != parent) {
|
||||
dentry = this_parent;
|
||||
this_parent = dentry->d_parent;
|
||||
|
||||
spin_unlock(&dentry->d_lock);
|
||||
spin_lock(&this_parent->d_lock);
|
||||
|
||||
/* might go back up the wrong parent if we have had a rename. */
|
||||
if (need_seqretry(&rename_lock, seq))
|
||||
goto rename_retry;
|
||||
/* go into the first sibling still alive */
|
||||
hlist_for_each_entry_continue(dentry, d_sib) {
|
||||
if (likely(!(dentry->d_flags & DCACHE_DENTRY_KILLED))) {
|
||||
rcu_read_unlock();
|
||||
goto resume;
|
||||
}
|
||||
}
|
||||
goto ascend;
|
||||
}
|
||||
if (need_seqretry(&rename_lock, seq))
|
||||
goto rename_retry;
|
||||
rcu_read_unlock();
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&this_parent->d_lock);
|
||||
done_seqretry(&rename_lock, seq);
|
||||
return;
|
||||
|
||||
rename_retry:
|
||||
spin_unlock(&this_parent->d_lock);
|
||||
rcu_read_unlock();
|
||||
BUG_ON(seq & 1);
|
||||
if (!retry)
|
||||
return;
|
||||
seq = 1;
|
||||
goto again;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* have_submounts - check for mounts over a dentry
|
||||
* @parent: dentry to check.
|
||||
*
|
||||
* Return true if the parent or its subdirectories contain
|
||||
* a mount point
|
||||
*/
|
||||
int have_submounts(struct dentry *parent)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
#if defined(KERNEL_HAS_DENTRY_SUBDIRS)
|
||||
d_walk(parent, &ret, check_mount, NULL);
|
||||
#else
|
||||
d_walk(parent, &ret, check_mount);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
396
client_module/source/os/OsCompat.h
Normal file
396
client_module/source/os/OsCompat.h
Normal file
@@ -0,0 +1,396 @@
|
||||
/*
|
||||
* Compatibility functions for older Linux versions
|
||||
*/
|
||||
|
||||
#ifndef OSCOMPAT_H_
|
||||
#define OSCOMPAT_H_
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/writeback.h>
|
||||
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
|
||||
#include <linux/semaphore.h>
|
||||
|
||||
|
||||
#ifndef KERNEL_HAS_MEMDUP_USER
|
||||
extern void *memdup_user(const void __user *src, size_t len);
|
||||
#endif
|
||||
|
||||
#ifndef KERNEL_HAS_D_MAKE_ROOT
|
||||
extern struct dentry *d_make_root(struct inode *root_inode);
|
||||
#endif
|
||||
|
||||
#if defined(KERNEL_HAS_SB_BDI) && !defined(KERNEL_HAS_BDI_SETUP_AND_REGISTER)
|
||||
extern int bdi_setup_and_register(struct backing_dev_info *bdi, char *name, unsigned int cap);
|
||||
#endif
|
||||
|
||||
#ifndef KERNEL_HAS_HAVE_SUBMOUNTS
|
||||
extern int have_submounts(struct dentry *parent);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* PG_error and SetPageError() have been deprecated and removed in Linux 6.12.
|
||||
* We now use mapping_set_error() to record writeback errors at the address_space level.
|
||||
*
|
||||
* This ensures compatibility with kernels >= 4.19 and aligns with the new writeback
|
||||
* error tracking model using errseq_t (see LWN: https://lwn.net/Articles/724307/).
|
||||
*
|
||||
* BeeGFS compatibility:
|
||||
* - Buffered mode paths already use filemap_fdatawait(), which calls filemap_check_errors().
|
||||
* - Native mode uses file_write_and_wait_range(), which calls file_check_and_advance_wb_err().
|
||||
*/
|
||||
|
||||
/**
|
||||
* fhgfs_set_wb_error - Record a writeback error at the mapping level
|
||||
*
|
||||
* Replaces SetPageError(); safe across all supported kernels.
|
||||
*
|
||||
* @page: the page associated with the mapping
|
||||
* @err: the error code
|
||||
*/
|
||||
static inline void fhgfs_set_wb_error(struct page *page, int err)
|
||||
{
|
||||
if (page && page->mapping && err)
|
||||
mapping_set_error(page->mapping, err);
|
||||
}
|
||||
|
||||
/**
|
||||
* generic_permission() compatibility function
|
||||
*
|
||||
* NOTE: Only kernels > 2.6.32 do have inode->i_op->check_acl, but as we do not
|
||||
* support it anyway for now, we do not need a complete kernel version check for it.
|
||||
* Also, in order to skip useless pointer references we just pass NULL here.
|
||||
*/
|
||||
static inline int os_generic_permission(struct inode *inode, int mask)
|
||||
{
|
||||
#ifdef KERNEL_HAS_GENERIC_PERMISSION_2
|
||||
return generic_permission(inode, mask);
|
||||
#elif defined(KERNEL_HAS_GENERIC_PERMISSION_4)
|
||||
return generic_permission(inode, mask, 0, NULL);
|
||||
#elif defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
||||
return generic_permission(&nop_mnt_idmap, inode, mask);
|
||||
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
||||
return generic_permission(&init_user_ns, inode, mask);
|
||||
#else
|
||||
return generic_permission(inode, mask, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(KERNEL_HAS_GENERIC_FILLATTR_REQUEST_MASK)
|
||||
static inline void os_generic_fillattr(struct inode *inode, struct kstat *kstat, u32 request_mask)
|
||||
#else
|
||||
static inline void os_generic_fillattr(struct inode *inode, struct kstat *kstat)
|
||||
#endif
|
||||
{
|
||||
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
||||
#if defined(KERNEL_HAS_GENERIC_FILLATTR_REQUEST_MASK)
|
||||
generic_fillattr(&nop_mnt_idmap, request_mask, inode, kstat);
|
||||
#else
|
||||
generic_fillattr(&nop_mnt_idmap, inode, kstat);
|
||||
#endif // KERNEL_HAS_GENERIC_FILLATTR_REQUEST_MASK
|
||||
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
||||
generic_fillattr(&init_user_ns, inode, kstat);
|
||||
#else
|
||||
generic_fillattr(inode, kstat);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef KERNEL_HAS_SETATTR_PREPARE
|
||||
static inline int os_setattr_prepare(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
||||
return setattr_prepare(&nop_mnt_idmap, dentry, attr);
|
||||
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
||||
return setattr_prepare(&init_user_ns, dentry, attr);
|
||||
#else
|
||||
return setattr_prepare(dentry, attr);
|
||||
#endif
|
||||
}
|
||||
#endif // KERNEL_HAS_SETATTR_PREPARE
|
||||
|
||||
static inline bool os_inode_owner_or_capable(const struct inode *inode)
|
||||
{
|
||||
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
||||
return inode_owner_or_capable(&nop_mnt_idmap, inode);
|
||||
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
||||
return inode_owner_or_capable(&init_user_ns, inode);
|
||||
#else
|
||||
return inode_owner_or_capable(inode);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef KERNEL_HAS_D_MATERIALISE_UNIQUE
|
||||
extern struct dentry* d_materialise_unique(struct dentry *dentry, struct inode *inode);
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
|
||||
/**
|
||||
* Taken from ext3 dir.c. is_compat_task() does work for all kernels, although it was already there.
|
||||
* So we are conservativ and only allow it for recent kernels.
|
||||
*/
|
||||
static inline int is_32bit_api(void)
|
||||
{
|
||||
#ifdef CONFIG_COMPAT
|
||||
# ifdef in_compat_syscall
|
||||
return in_compat_syscall();
|
||||
# else
|
||||
return is_compat_task();
|
||||
# endif
|
||||
#else
|
||||
return (BITS_PER_LONG == 32);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
static inline int is_32bit_api(void)
|
||||
{
|
||||
return (BITS_PER_LONG == 32);
|
||||
}
|
||||
#endif // LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
|
||||
|
||||
#ifndef KERNEL_HAS_I_UID_READ
|
||||
static inline uid_t i_uid_read(const struct inode *inode)
|
||||
{
|
||||
return inode->i_uid;
|
||||
}
|
||||
|
||||
static inline gid_t i_gid_read(const struct inode *inode)
|
||||
{
|
||||
return inode->i_gid;
|
||||
}
|
||||
|
||||
static inline void i_uid_write(struct inode *inode, uid_t uid)
|
||||
{
|
||||
inode->i_uid = uid;
|
||||
}
|
||||
|
||||
static inline void i_gid_write(struct inode *inode, gid_t gid)
|
||||
{
|
||||
inode->i_gid = gid;
|
||||
}
|
||||
|
||||
#endif // KERNEL_HAS_I_UID_READ
|
||||
|
||||
|
||||
#if defined(KERNEL_HAS_KMEMCACHE_CACHE_FLAGS_CTOR)
|
||||
struct kmem_cache* OsCompat_initKmemCache(const char* cacheName, size_t cacheSize,
|
||||
void initFuncPtr(void* initObj, struct kmem_cache* cache, unsigned long flags) );
|
||||
#elif defined(KERNEL_HAS_KMEMCACHE_CACHE_CTOR)
|
||||
struct kmem_cache* OsCompat_initKmemCache(const char* cacheName, size_t cacheSize,
|
||||
void initFuncPtr(struct kmem_cache* cache, void* initObj) );
|
||||
#else
|
||||
struct kmem_cache* OsCompat_initKmemCache(const char* cacheName, size_t cacheSize,
|
||||
void initFuncPtr(void* initObj) );
|
||||
#endif // LINUX_VERSION_CODE
|
||||
|
||||
|
||||
// added to 3.13, backported to -stable
|
||||
#ifndef list_next_entry
|
||||
/**
|
||||
* list_next_entry - get the next element in list
|
||||
* @pos: the type * to cursor
|
||||
* @member: the name of the list_struct within the struct.
|
||||
*/
|
||||
#define list_next_entry(pos, member) \
|
||||
list_entry((pos)->member.next, typeof(*(pos)), member)
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef list_first_entry
|
||||
/**
|
||||
* list_first_entry - get the first element from a list
|
||||
* @ptr: the list head to take the element from.
|
||||
* @type: the type of the struct this is embedded in.
|
||||
* @member: the name of the list_struct within the struct.
|
||||
*
|
||||
* Note, that list is expected to be not empty.
|
||||
*/
|
||||
#define list_first_entry(ptr, type, member) \
|
||||
list_entry((ptr)->next, type, member)
|
||||
#endif // list_first_entry
|
||||
|
||||
|
||||
static inline struct posix_acl* os_posix_acl_from_xattr(const void* value, size_t size)
|
||||
{
|
||||
#ifndef KERNEL_HAS_POSIX_ACL_XATTR_USERNS_ARG
|
||||
return posix_acl_from_xattr(value, size);
|
||||
#else
|
||||
return posix_acl_from_xattr(&init_user_ns, value, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int os_posix_acl_to_xattr(const struct posix_acl* acl, void* buffer, size_t size)
|
||||
{
|
||||
#ifndef KERNEL_HAS_POSIX_ACL_XATTR_USERNS_ARG
|
||||
return posix_acl_to_xattr(acl, buffer, size);
|
||||
#else
|
||||
return posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(KERNEL_HAS_SET_ACL) || defined(KERNEL_HAS_SET_ACL_DENTRY)
|
||||
static inline int os_posix_acl_chmod(struct dentry *dentry, umode_t mode)
|
||||
{
|
||||
|
||||
#if defined(KERNEL_HAS_IDMAPPED_MOUNTS)
|
||||
return posix_acl_chmod(&nop_mnt_idmap, dentry, mode);
|
||||
|
||||
#elif defined(KERNEL_HAS_POSIX_ACL_CHMOD_NS_DENTRY)
|
||||
return posix_acl_chmod(&init_user_ns, dentry, mode);
|
||||
|
||||
#elif defined(KERNEL_HAS_USER_NS_MOUNTS)
|
||||
return posix_acl_chmod(&init_user_ns, dentry->d_inode, mode);
|
||||
|
||||
#else
|
||||
return posix_acl_chmod(dentry->d_inode, mode);
|
||||
#endif
|
||||
}
|
||||
#endif // KERNEL_HAS_SET_ACL || KERNEL_HAS_SET_ACL_DENTRY
|
||||
|
||||
#ifndef KERNEL_HAS_PAGE_ENDIO
|
||||
static inline void page_endio(struct page *page, int rw, int err)
|
||||
{
|
||||
if (rw == READ)
|
||||
{
|
||||
if (!err)
|
||||
{
|
||||
SetPageUptodate(page);
|
||||
}
|
||||
else
|
||||
{
|
||||
ClearPageUptodate(page);
|
||||
fhgfs_set_wb_error(page, err);
|
||||
}
|
||||
|
||||
unlock_page(page);
|
||||
}
|
||||
else
|
||||
{ /* rw == WRITE */
|
||||
if (err)
|
||||
{
|
||||
fhgfs_set_wb_error(page, err);
|
||||
}
|
||||
|
||||
end_page_writeback(page);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KERNEL_HAS_GENERIC_WRITE_CHECKS_ITER
|
||||
# define os_generic_write_checks generic_write_checks
|
||||
#else
|
||||
extern int os_generic_write_checks(struct file* filp, loff_t* offset, size_t* size, int isblk);
|
||||
#endif
|
||||
|
||||
#ifndef rb_entry_safe
|
||||
#define rb_entry_safe(ptr, type, member) \
|
||||
({ typeof(ptr) ____ptr = (ptr); \
|
||||
____ptr ? rb_entry(____ptr, type, member) : NULL; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#ifndef rbtree_postorder_for_each_entry_safe
|
||||
#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
|
||||
for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
|
||||
pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \
|
||||
typeof(*pos), field); 1; }); \
|
||||
pos = n)
|
||||
|
||||
extern struct rb_node *rb_first_postorder(const struct rb_root *);
|
||||
extern struct rb_node *rb_next_postorder(const struct rb_node *);
|
||||
#endif
|
||||
|
||||
#ifndef KERNEL_HAS_CURRENT_UMASK
|
||||
#define current_umask() (current->fs->umask)
|
||||
#endif
|
||||
|
||||
#ifndef XATTR_NAME_POSIX_ACL_ACCESS
|
||||
# define XATTR_POSIX_ACL_ACCESS "posix_acl_access"
|
||||
# define XATTR_NAME_POSIX_ACL_ACCESS XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_ACCESS
|
||||
# define XATTR_POSIX_ACL_DEFAULT "posix_acl_default"
|
||||
# define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT
|
||||
#endif
|
||||
|
||||
#ifndef KERNEL_HAS_I_MMAP_LOCK
|
||||
static inline void i_mmap_lock_read(struct address_space* mapping)
|
||||
{
|
||||
#if defined(KERNEL_HAS_I_MMAP_RWSEM)
|
||||
down_read(&mapping->i_mmap_rwsem);
|
||||
#elif defined(KERNEL_HAS_I_MMAP_MUTEX)
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
#else
|
||||
spin_lock(&mapping->i_mmap_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void i_mmap_unlock_read(struct address_space* mapping)
|
||||
{
|
||||
#if defined(KERNEL_HAS_I_MMAP_RWSEM)
|
||||
up_read(&mapping->i_mmap_rwsem);
|
||||
#elif defined(KERNEL_HAS_I_MMAP_MUTEX)
|
||||
mutex_unlock(&mapping->i_mmap_mutex);
|
||||
#else
|
||||
spin_unlock(&mapping->i_mmap_lock);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool beegfs_hasMappings(struct inode* inode)
|
||||
{
|
||||
#if defined(KERNEL_HAS_I_MMAP_RBTREE)
|
||||
if (!RB_EMPTY_ROOT(&inode->i_mapping->i_mmap))
|
||||
return true;
|
||||
#elif defined(KERNEL_HAS_I_MMAP_CACHED_RBTREE)
|
||||
if (!RB_EMPTY_ROOT(&inode->i_mapping->i_mmap.rb_root))
|
||||
return true;
|
||||
#else
|
||||
if (!prio_tree_empty(&inode->i_mapping->i_mmap))
|
||||
return true;
|
||||
#endif
|
||||
|
||||
#ifdef KERNEL_HAS_I_MMAP_NONLINEAR
|
||||
if (!list_empty(&inode->i_mapping->i_mmap_nonlinear))
|
||||
return true;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifndef KERNEL_HAS_INODE_LOCK
|
||||
static inline void os_inode_lock(struct inode* inode)
|
||||
{
|
||||
mutex_lock(&inode->i_mutex);
|
||||
}
|
||||
|
||||
static inline void os_inode_unlock(struct inode* inode)
|
||||
{
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
#else
|
||||
static inline void os_inode_lock(struct inode* inode)
|
||||
{
|
||||
inode_lock(inode);
|
||||
}
|
||||
|
||||
static inline void os_inode_unlock(struct inode* inode)
|
||||
{
|
||||
inode_unlock(inode);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(KERNEL_ACCESS_OK_WANTS_TYPE)
|
||||
# define os_access_ok(type, addr, size) access_ok(type, addr, size)
|
||||
#else
|
||||
# define os_access_ok(type, addr, size) access_ok(addr, size)
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* OSCOMPAT_H_ */
|
||||
117
client_module/source/os/OsDeps.c
Normal file
117
client_module/source/os/OsDeps.c
Normal file
@@ -0,0 +1,117 @@
|
||||
#include <common/Common.h>
|
||||
#include <os/OsDeps.h>
|
||||
#include <common/FhgfsTypes.h>
|
||||
#include <common/net/sock/NicAddress.h>
|
||||
#include <common/net/sock/Socket.h>
|
||||
#include <filesystem/FhgfsOps_versions.h>
|
||||
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/inetdevice.h>
|
||||
|
||||
|
||||
#ifdef CONFIG_STACKTRACE
|
||||
#include <linux/stacktrace.h>
|
||||
#endif
|
||||
|
||||
#define MAX_STACK_TRACE_CHAIN 16 // number of functions to to save in a stack trace
|
||||
|
||||
|
||||
#ifdef BEEGFS_DEBUG
|
||||
|
||||
// Significant parts of the kernel code around struct stack_trace are removed
|
||||
// when CONFIG_ARCH_STACKWALK is set. Code below needs to be rewritten to work
|
||||
// with newer kernels that have CONFIG_ARCH_STACKWALK enabled.
|
||||
#if defined CONFIG_STACKTRACE && !defined CONFIG_ARCH_STACKWALK
|
||||
|
||||
/**
|
||||
* Save a given trace. NOTE: Allocated memory has to be freed later on!
|
||||
*/
|
||||
void* os_saveStackTrace(void)
|
||||
{
|
||||
struct stack_trace* trace;
|
||||
unsigned long *entries;
|
||||
|
||||
trace = kmalloc(sizeof(struct stack_trace), GFP_NOFS);
|
||||
if (!trace)
|
||||
return NULL; // out of memory?
|
||||
|
||||
entries = kmalloc(MAX_STACK_TRACE_CHAIN * sizeof(*entries), GFP_NOFS);
|
||||
if (!entries)
|
||||
{ // out of memory?
|
||||
kfree(trace);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
trace->nr_entries = 0;
|
||||
trace->max_entries = MAX_STACK_TRACE_CHAIN;
|
||||
trace->entries = entries;
|
||||
trace->skip = 1; // cut off ourself, so 1
|
||||
|
||||
save_stack_trace(trace);
|
||||
|
||||
return trace;
|
||||
}
|
||||
|
||||
void os_freeStackTrace(void *trace)
|
||||
{
|
||||
struct stack_trace* os_trace = (struct stack_trace*)trace;
|
||||
|
||||
if (!trace)
|
||||
{ // May be NULL, if kmalloc or vmalloc failed
|
||||
return;
|
||||
}
|
||||
|
||||
kfree(os_trace->entries);
|
||||
kfree(os_trace);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a stack trace
|
||||
*
|
||||
* @param trace The stack trace to print
|
||||
* @param spaces Insert 'spaces' white-spaces at the beginning of the line
|
||||
*/
|
||||
void os_printStackTrace(void* trace, int spaces)
|
||||
{
|
||||
if (!trace)
|
||||
{ // Maybe NULL, if kmalloc or vmalloc failed
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
struct stack_trace *stack_trace = trace;
|
||||
#if defined(KERNEL_HAS_PRINT_STACK_TRACE)
|
||||
print_stack_trace(stack_trace, spaces);
|
||||
#elif defined(KERNEL_HAS_STACK_TRACE_PRINT)
|
||||
stack_trace_print(stack_trace->entries, stack_trace->nr_entries, spaces);
|
||||
#else
|
||||
(void) stack_trace;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#else // no CONFIG_STACKTRACE or CONFIG_ARCH_STACKWALK enabled => nothing to do at all
|
||||
|
||||
void* os_saveStackTrace(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void os_printStackTrace(void* trace, int spaces)
|
||||
{
|
||||
printk_fhgfs(KERN_INFO, "Kernel without stack trace support!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
void os_freeStackTrace(void* trace)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#endif // CONFIG_STACKTRACE && !CONFIG_ARCH_STACKWALK
|
||||
|
||||
#endif // BEEGFS_DEBUG
|
||||
|
||||
|
||||
71
client_module/source/os/OsDeps.h
Normal file
71
client_module/source/os/OsDeps.h
Normal file
@@ -0,0 +1,71 @@
|
||||
#ifndef OPEN_OSDEPS_H_
|
||||
#define OPEN_OSDEPS_H_
|
||||
|
||||
#include <filesystem/FhgfsOps_versions.h>
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
|
||||
#ifdef BEEGFS_DEBUG
|
||||
extern void* os_saveStackTrace(void);
|
||||
extern void os_printStackTrace(void * trace, int spaces);
|
||||
extern void os_freeStackTrace(void *trace);
|
||||
#endif // BEEGFS_DEBUG
|
||||
|
||||
|
||||
// inliners
|
||||
static inline void* os_kmalloc(size_t size);
|
||||
static inline void* os_kzalloc(size_t size);
|
||||
|
||||
static inline int os_strnicmp(const char* s1, const char* s2, size_t n);
|
||||
|
||||
|
||||
void* os_kmalloc(size_t size)
|
||||
{
|
||||
void* buf = kmalloc(size, GFP_NOFS);
|
||||
|
||||
if(unlikely(!buf) )
|
||||
{
|
||||
printk(KERN_WARNING BEEGFS_MODULE_NAME_STR ": kmalloc of '%d' bytes failed. Retrying...\n", (int)size);
|
||||
buf = kmalloc(size, GFP_NOFS | __GFP_NOFAIL);
|
||||
printk(KERN_WARNING BEEGFS_MODULE_NAME_STR ": kmalloc retry of '%d' bytes succeeded\n", (int)size);
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void* os_kzalloc(size_t size)
|
||||
{
|
||||
void* buf = kzalloc(size, GFP_NOFS);
|
||||
|
||||
if(unlikely(!buf) )
|
||||
{
|
||||
printk(KERN_WARNING BEEGFS_MODULE_NAME_STR ": kzalloc of '%d' bytes failed. Retrying...\n", (int)size);
|
||||
buf = kzalloc(size, GFP_NOFS | __GFP_NOFAIL);
|
||||
printk(KERN_WARNING BEEGFS_MODULE_NAME_STR ": kzalloc retry of '%d' bytes succeeded\n", (int)size);
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* strncasecmp was broken in the linux kernel pre-3.18. strnicmp was
|
||||
* implemented correctly in that timeframe. In kernel >= 3.18, strnicmp
|
||||
* is either a wrapper for strncasecmp or is not defined.
|
||||
*/
|
||||
int os_strnicmp(const char *s1, const char *s2, size_t n)
|
||||
{
|
||||
#ifdef KERNEL_HAS_STRNICMP
|
||||
return strnicmp(s1, s2, n);
|
||||
#else
|
||||
return strncasecmp(s1, s2, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* OPEN_OSDEPS_H_ */
|
||||
201
client_module/source/os/OsTypeConversion.h
Normal file
201
client_module/source/os/OsTypeConversion.h
Normal file
@@ -0,0 +1,201 @@
|
||||
#ifndef OSTYPECONVERSION_INTERNAL_H_
|
||||
#define OSTYPECONVERSION_INTERNAL_H_
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <os/OsTypeConversion.h>
|
||||
#include <common/toolkit/Time.h>
|
||||
#include <common/storage/StorageDefinitions.h>
|
||||
|
||||
#include <linux/fs.h>
|
||||
#if defined(KERNEL_HAS_LINUX_FILELOCK_H)
|
||||
#include <linux/filelock.h>
|
||||
#endif
|
||||
|
||||
static inline int OsTypeConv_openFlagsOsToFhgfs(int osFlags, bool isPagedMode);
|
||||
static inline void OsTypeConv_kstatFhgfsToOs(fhgfs_stat* fhgfsStat, struct kstat* kStat);
|
||||
static inline void OsTypeConv_iattrOsToFhgfs(struct iattr* iAttr, SettableFileAttribs* fhgfsAttr,
|
||||
int* outValidAttribs);
|
||||
static inline unsigned OsTypeConv_dirEntryTypeToOS(DirEntryType entryType);
|
||||
static inline int OsTypeConv_flockTypeToFhgfs(struct file_lock* fileLock);
|
||||
|
||||
|
||||
/**
|
||||
* @param osFlags file open mode flags
|
||||
* @return OPENFILE_ACCESS_... flags
|
||||
*/
|
||||
int OsTypeConv_openFlagsOsToFhgfs(int osFlags, bool isPagedMode)
|
||||
{
|
||||
int fhgfsFlags = 0;
|
||||
|
||||
if(osFlags & O_RDWR)
|
||||
fhgfsFlags |= OPENFILE_ACCESS_READWRITE;
|
||||
else
|
||||
if(osFlags & O_WRONLY)
|
||||
{
|
||||
if (!isPagedMode)
|
||||
fhgfsFlags |= OPENFILE_ACCESS_WRITE;
|
||||
else
|
||||
{ /* in order to update read-modify-write pages with the storage content we a
|
||||
* read-write handle */
|
||||
fhgfsFlags |= OPENFILE_ACCESS_READWRITE;
|
||||
}
|
||||
}
|
||||
else
|
||||
fhgfsFlags |= OPENFILE_ACCESS_READ;
|
||||
|
||||
|
||||
if(osFlags & O_APPEND)
|
||||
fhgfsFlags |= OPENFILE_ACCESS_APPEND;
|
||||
|
||||
if(osFlags & O_TRUNC)
|
||||
fhgfsFlags |= OPENFILE_ACCESS_TRUNC;
|
||||
|
||||
if(osFlags & O_DIRECT)
|
||||
fhgfsFlags |= OPENFILE_ACCESS_DIRECT;
|
||||
|
||||
if(osFlags & O_SYNC)
|
||||
fhgfsFlags |= OPENFILE_ACCESS_SYNC;
|
||||
|
||||
|
||||
return fhgfsFlags;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param kStat unused fields will be set to zero
|
||||
*/
|
||||
void OsTypeConv_kstatFhgfsToOs(fhgfs_stat* fhgfsStat, struct kstat* kStat)
|
||||
{
|
||||
memset(kStat, 0, sizeof(*kStat) );
|
||||
|
||||
kStat->mode = fhgfsStat->mode;
|
||||
kStat->nlink = fhgfsStat->nlink;
|
||||
kStat->uid = make_kuid(&init_user_ns, fhgfsStat->uid);
|
||||
kStat->gid = make_kgid(&init_user_ns, fhgfsStat->gid);
|
||||
kStat->size = fhgfsStat->size;
|
||||
kStat->blocks = fhgfsStat->blocks;
|
||||
kStat->atime.tv_sec = fhgfsStat->atime.tv_sec;
|
||||
kStat->atime.tv_nsec = fhgfsStat->atime.tv_nsec;
|
||||
kStat->mtime.tv_sec = fhgfsStat->mtime.tv_sec;
|
||||
kStat->mtime.tv_nsec = fhgfsStat->mtime.tv_nsec;
|
||||
kStat->ctime.tv_sec = fhgfsStat->ctime.tv_sec; // attrib change time (not creation time)
|
||||
kStat->ctime.tv_nsec = fhgfsStat->ctime.tv_nsec; // attrib change time (not creation time)
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert kernel iattr to fhgfsAttr. Also update the inode with the new attributes.
|
||||
*/
|
||||
void OsTypeConv_iattrOsToFhgfs(struct iattr* iAttr, SettableFileAttribs* fhgfsAttr,
|
||||
int* outValidAttribs)
|
||||
{
|
||||
Time now;
|
||||
Time_setToNowReal(&now);
|
||||
|
||||
*outValidAttribs = 0;
|
||||
|
||||
if(iAttr->ia_valid & ATTR_MODE)
|
||||
{
|
||||
(*outValidAttribs) |= SETATTR_CHANGE_MODE;
|
||||
fhgfsAttr->mode = iAttr->ia_mode;
|
||||
}
|
||||
|
||||
if(iAttr->ia_valid & ATTR_UID)
|
||||
{
|
||||
(*outValidAttribs) |= SETATTR_CHANGE_USERID;
|
||||
fhgfsAttr->userID = from_kuid(&init_user_ns, iAttr->ia_uid);
|
||||
}
|
||||
|
||||
if(iAttr->ia_valid & ATTR_GID)
|
||||
{
|
||||
(*outValidAttribs) |= SETATTR_CHANGE_GROUPID;
|
||||
fhgfsAttr->groupID = from_kgid(&init_user_ns, iAttr->ia_gid);
|
||||
}
|
||||
|
||||
if(iAttr->ia_valid & ATTR_MTIME_SET)
|
||||
{
|
||||
(*outValidAttribs) |= SETATTR_CHANGE_MODIFICATIONTIME;
|
||||
fhgfsAttr->modificationTimeSecs = iAttr->ia_mtime.tv_sec;
|
||||
}
|
||||
else
|
||||
if(iAttr->ia_valid & ATTR_MTIME)
|
||||
{ // set mtime to "now"
|
||||
(*outValidAttribs) |= SETATTR_CHANGE_MODIFICATIONTIME;
|
||||
fhgfsAttr->modificationTimeSecs = now.tv_sec;
|
||||
}
|
||||
|
||||
if(iAttr->ia_valid & ATTR_ATIME_SET)
|
||||
{
|
||||
(*outValidAttribs) |= SETATTR_CHANGE_LASTACCESSTIME;
|
||||
fhgfsAttr->lastAccessTimeSecs = iAttr->ia_atime.tv_sec;
|
||||
}
|
||||
else
|
||||
if(iAttr->ia_valid & ATTR_ATIME)
|
||||
{ // set atime to "now"
|
||||
(*outValidAttribs) |= SETATTR_CHANGE_LASTACCESSTIME;
|
||||
fhgfsAttr->lastAccessTimeSecs = now.tv_sec;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert fhgfs DirEntryType to OS DT_... for readdir()'s filldir.
|
||||
*/
|
||||
unsigned OsTypeConv_dirEntryTypeToOS(DirEntryType entryType)
|
||||
{
|
||||
if(DirEntryType_ISDIR(entryType) )
|
||||
return DT_DIR;
|
||||
|
||||
if(DirEntryType_ISREGULARFILE(entryType) )
|
||||
return DT_REG;
|
||||
|
||||
if(DirEntryType_ISSYMLINK(entryType) )
|
||||
return DT_LNK;
|
||||
|
||||
if(DirEntryType_ISBLOCKDEV(entryType) )
|
||||
return DT_BLK;
|
||||
|
||||
if(DirEntryType_ISCHARDEV(entryType) )
|
||||
return DT_CHR;
|
||||
|
||||
if(DirEntryType_ISFIFO(entryType) )
|
||||
return DT_FIFO;
|
||||
|
||||
if(DirEntryType_ISSOCKET(entryType) )
|
||||
return DT_SOCK;
|
||||
|
||||
return DT_UNKNOWN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the OS F_..LCK lock type flags of an flock operation to fhgfs ENTRYLOCKTYPE_... lock type
|
||||
* flags.
|
||||
*
|
||||
* @
|
||||
*/
|
||||
static inline int OsTypeConv_flockTypeToFhgfs(struct file_lock* fileLock)
|
||||
{
|
||||
int fhgfsLockFlags = 0;
|
||||
|
||||
switch(FhgfsCommon_getFileLockType(fileLock))
|
||||
{
|
||||
case F_RDLCK:
|
||||
{
|
||||
fhgfsLockFlags = ENTRYLOCKTYPE_SHARED;
|
||||
} break;
|
||||
|
||||
case F_WRLCK:
|
||||
{
|
||||
fhgfsLockFlags = ENTRYLOCKTYPE_EXCLUSIVE;
|
||||
} break;
|
||||
|
||||
default:
|
||||
{
|
||||
fhgfsLockFlags = ENTRYLOCKTYPE_UNLOCK;
|
||||
} break;
|
||||
}
|
||||
|
||||
if(!(FhgfsCommon_getFileLockFlags(fileLock) & FL_SLEEP) )
|
||||
fhgfsLockFlags |= ENTRYLOCKTYPE_NOWAIT;
|
||||
|
||||
return fhgfsLockFlags;
|
||||
}
|
||||
|
||||
#endif /* OSTYPECONVERSION_INTERNAL_H_ */
|
||||
215
client_module/source/os/atomic64.c
Normal file
215
client_module/source/os/atomic64.c
Normal file
@@ -0,0 +1,215 @@
|
||||
#include <common/Common.h>
|
||||
|
||||
#include <asm/atomic.h> // also adds ATOMIC64_INIT if available
|
||||
|
||||
|
||||
#ifndef ATOMIC64_INIT // basic test if the kernel already provides atomic64_t
|
||||
|
||||
|
||||
/*
|
||||
* Note: Below is the atomic64.c copied and modified from linux-git, for architectures, which do
|
||||
* not support native 64-bit spin-locks in hardware. As we need to have compatibility with
|
||||
* older kernels we had to replace the usage of raw_spin_locks. This is probably
|
||||
* slower and therefore the in-kernel implementation should be used if available.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Generic implementation of 64-bit atomics using spinlocks,
|
||||
* useful on processors that don't have 64-bit atomic instructions.
|
||||
*
|
||||
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/init.h>
|
||||
// #include <linux/export.h> // disabled as not available in 2.6.16
|
||||
// #include <linux/atomic.h> // disabled as not available in 2.6.16
|
||||
|
||||
#include "atomic64.h" // added for fhgfs
|
||||
|
||||
#if 0 // disabled for the simplified fhgfs version
|
||||
/*
|
||||
* We use a hashed array of spinlocks to provide exclusive access
|
||||
* to each atomic64_t variable. Since this is expected to used on
|
||||
* systems with small numbers of CPUs (<= 4 or so), we use a
|
||||
* relatively small array of 16 spinlocks to avoid wasting too much
|
||||
* memory on the spinlock array.
|
||||
*/
|
||||
#define NR_LOCKS 16
|
||||
|
||||
/*
|
||||
* Ensure each lock is in a separate cacheline.
|
||||
*/
|
||||
static union {
|
||||
spinlock_t lock;
|
||||
char pad[L1_CACHE_BYTES];
|
||||
} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = {
|
||||
[0 ... (NR_LOCKS - 1)] = {
|
||||
.lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
static inline spinlock_t *lock_addr(const atomic64_t *v)
|
||||
{
|
||||
unsigned long addr = (unsigned long) v;
|
||||
|
||||
addr >>= L1_CACHE_SHIFT;
|
||||
addr ^= (addr >> 8) ^ (addr >> 16);
|
||||
return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Simplified version for fhgfs
|
||||
*/
|
||||
static inline spinlock_t *lock_addr(const atomic64_t *v)
|
||||
{
|
||||
atomic64_t* value = (atomic64_t*) v;
|
||||
|
||||
return &value->lock;
|
||||
}
|
||||
|
||||
long long atomic64_read(const atomic64_t *v)
|
||||
{
|
||||
unsigned long flags;
|
||||
spinlock_t *lock = lock_addr(v);
|
||||
long long val;
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
val = v->counter;
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
return val;
|
||||
}
|
||||
// EXPORT_SYMBOL(atomic64_read);
|
||||
|
||||
void atomic64_set(atomic64_t *v, long long i)
|
||||
{
|
||||
unsigned long flags;
|
||||
spinlock_t *lock = lock_addr(v);
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
v->counter = i;
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
// EXPORT_SYMBOL(atomic64_set);
|
||||
|
||||
void atomic64_add(long long a, atomic64_t *v)
|
||||
{
|
||||
unsigned long flags;
|
||||
spinlock_t *lock = lock_addr(v);
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
v->counter += a;
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
// EXPORT_SYMBOL(atomic64_add);
|
||||
|
||||
long long atomic64_add_return(long long a, atomic64_t *v)
|
||||
{
|
||||
unsigned long flags;
|
||||
spinlock_t *lock = lock_addr(v);
|
||||
long long val;
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
val = v->counter += a;
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
return val;
|
||||
}
|
||||
// EXPORT_SYMBOL(atomic64_add_return);
|
||||
|
||||
void atomic64_sub(long long a, atomic64_t *v)
|
||||
{
|
||||
unsigned long flags;
|
||||
spinlock_t *lock = lock_addr(v);
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
v->counter -= a;
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
// EXPORT_SYMBOL(atomic64_sub);
|
||||
|
||||
long long atomic64_sub_return(long long a, atomic64_t *v)
|
||||
{
|
||||
unsigned long flags;
|
||||
spinlock_t *lock = lock_addr(v);
|
||||
long long val;
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
val = v->counter -= a;
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
return val;
|
||||
}
|
||||
// EXPORT_SYMBOL(atomic64_sub_return);
|
||||
|
||||
long long atomic64_dec_if_positive(atomic64_t *v)
|
||||
{
|
||||
unsigned long flags;
|
||||
spinlock_t *lock = lock_addr(v);
|
||||
long long val;
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
val = v->counter - 1;
|
||||
if (val >= 0)
|
||||
v->counter = val;
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
return val;
|
||||
}
|
||||
// EXPORT_SYMBOL(atomic64_dec_if_positive);
|
||||
|
||||
long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
|
||||
{
|
||||
unsigned long flags;
|
||||
spinlock_t *lock = lock_addr(v);
|
||||
long long val;
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
val = v->counter;
|
||||
if (val == o)
|
||||
v->counter = n;
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
return val;
|
||||
}
|
||||
// EXPORT_SYMBOL(atomic64_cmpxchg);
|
||||
|
||||
long long atomic64_xchg(atomic64_t *v, long long new)
|
||||
{
|
||||
unsigned long flags;
|
||||
spinlock_t *lock = lock_addr(v);
|
||||
long long val;
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
val = v->counter;
|
||||
v->counter = new;
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
return val;
|
||||
}
|
||||
// EXPORT_SYMBOL(atomic64_xchg);
|
||||
|
||||
int atomic64_add_unless(atomic64_t *v, long long a, long long u)
|
||||
{
|
||||
unsigned long flags;
|
||||
spinlock_t *lock = lock_addr(v);
|
||||
int ret = 0;
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
if (v->counter != u) {
|
||||
v->counter += a;
|
||||
ret = 1;
|
||||
}
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
return ret;
|
||||
}
|
||||
// EXPORT_SYMBOL(atomic64_add_unless);
|
||||
|
||||
|
||||
#endif // #ifndef ATOMIC64_INIT
|
||||
69
client_module/source/os/atomic64.h
Normal file
69
client_module/source/os/atomic64.h
Normal file
@@ -0,0 +1,69 @@
|
||||
#include <common/Common.h>
|
||||
|
||||
#include <asm/atomic.h> // also adds ATOMIC64_INIT if available
|
||||
|
||||
|
||||
#ifndef ATOMIC64_INIT // basic test if the kernel already provides atomic64_t
|
||||
|
||||
/*
|
||||
* Note: Below is the atomic64.c copied from linux-git
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Generic implementation of 64-bit atomics using spinlocks,
|
||||
* useful on processors that don't have 64-bit atomic instructions.
|
||||
*
|
||||
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#ifndef _ASM_GENERIC_ATOMIC64_H
|
||||
#define _ASM_GENERIC_ATOMIC64_H
|
||||
|
||||
typedef struct {
|
||||
long long counter;
|
||||
spinlock_t lock; // added for fhgfs
|
||||
} atomic64_t;
|
||||
|
||||
// #define ATOMIC64_INIT(i) { (i) } // disabled for fhgfs
|
||||
|
||||
static inline void atomic_init(atomic64_t *atomic, uint64_t value); // added for fhgfs
|
||||
|
||||
extern long long atomic64_read(const atomic64_t *v);
|
||||
extern void atomic64_set(atomic64_t *v, long long i);
|
||||
extern void atomic64_add(long long a, atomic64_t *v);
|
||||
extern long long atomic64_add_return(long long a, atomic64_t *v);
|
||||
extern void atomic64_sub(long long a, atomic64_t *v);
|
||||
extern long long atomic64_sub_return(long long a, atomic64_t *v);
|
||||
extern long long atomic64_dec_if_positive(atomic64_t *v);
|
||||
extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
|
||||
extern long long atomic64_xchg(atomic64_t *v, long long new);
|
||||
extern int atomic64_add_unless(atomic64_t *v, long long a, long long u);
|
||||
|
||||
#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
|
||||
#define atomic64_inc(v) atomic64_add(1LL, (v))
|
||||
#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
|
||||
#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
|
||||
#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
|
||||
#define atomic64_dec(v) atomic64_sub(1LL, (v))
|
||||
#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v))
|
||||
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
|
||||
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
|
||||
|
||||
/*
|
||||
* Initializer for fhgfs, replacement for ATOMIC64_INIT(i)
|
||||
*/
|
||||
void atomic_init(atomic64_t* atomic, uint64_t value)
|
||||
{
|
||||
spin_lock_init(&atomic->lock);
|
||||
atomic->counter = value;
|
||||
}
|
||||
|
||||
|
||||
#endif /* _ASM_GENERIC_ATOMIC64_H */
|
||||
|
||||
#endif // #ifndef ATOMIC64_INIT
|
||||
144
client_module/source/os/iov_iter.c
Normal file
144
client_module/source/os/iov_iter.c
Normal file
@@ -0,0 +1,144 @@
|
||||
#include <os/iov_iter.h>
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
|
||||
static void beegfs_readsink_reserve_no_pipe(BeeGFS_ReadSink *rs, struct iov_iter *iter, size_t size)
|
||||
{
|
||||
rs->sanitized_iter = *iter;
|
||||
iov_iter_truncate(&rs->sanitized_iter, size);
|
||||
}
|
||||
|
||||
#ifdef KERNEL_HAS_ITER_PIPE
|
||||
static size_t compute_max_pagecount(size_t size)
|
||||
{
|
||||
// Compute maximal number of pages (in the pipe) that need to be present at once.
|
||||
// We don't know the page-relative offset from which max_size bytes will be reserved.
|
||||
// Assume the worst case.
|
||||
size_t max_offset = PAGE_SIZE - 1;
|
||||
|
||||
size_t max_pages = (max_offset + size + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
|
||||
return max_pages;
|
||||
}
|
||||
|
||||
|
||||
static void beegfs_readsink_reserve_pipe(BeeGFS_ReadSink *rs, struct iov_iter *iter, size_t size)
|
||||
{
|
||||
size_t max_pages;
|
||||
|
||||
// struct should be zeroed
|
||||
BUG_ON(rs->npages != 0);
|
||||
BUG_ON(rs->pages != 0);
|
||||
BUG_ON(rs->bvecs != 0);
|
||||
|
||||
// should we disallow size > iter count?
|
||||
size = min_t(size_t, size, iov_iter_count(iter));
|
||||
max_pages = compute_max_pagecount(size);
|
||||
|
||||
// Could be kmalloc() instead of kzalloc(), but the iov_iter_get_pages() API
|
||||
// gives back a byte count which makes it hard to detect initialization bugs
|
||||
// related to the page pointers.
|
||||
rs->pages = kzalloc(max_pages * sizeof *rs->pages, GFP_NOFS);
|
||||
if (! rs->pages)
|
||||
return;
|
||||
|
||||
rs->bvecs = kmalloc(max_pages * sizeof *rs->bvecs, GFP_NOFS);
|
||||
if (! rs->bvecs)
|
||||
return;
|
||||
|
||||
{
|
||||
struct bio_vec *const bvecs = rs->bvecs;
|
||||
struct page **const pages = rs->pages;
|
||||
|
||||
long unsigned start;
|
||||
ssize_t gpr;
|
||||
|
||||
size_t view_size = 0;
|
||||
|
||||
#ifdef KERNEL_HAS_IOV_ITER_GET_PAGES2
|
||||
|
||||
struct iov_iter copyIter = *iter; //Copying the iterator because iov_iter_get_pages2()
|
||||
//also performs the auto-advance of the iterator and
|
||||
//we don't want auto-advancement because in the end
|
||||
//of the while loop of the FhgfsOpsRemoting_readfileVec()
|
||||
//doing the same thing.
|
||||
gpr = iov_iter_get_pages2(©Iter, pages, size, max_pages, &start);
|
||||
|
||||
#else
|
||||
|
||||
gpr = iov_iter_get_pages(iter, pages, size, max_pages, &start);
|
||||
|
||||
#endif
|
||||
|
||||
if (gpr < 0)
|
||||
{
|
||||
// indicate error?
|
||||
// probably not necessary. The sanitized_iter field will be initialized with count 0.
|
||||
}
|
||||
else if (gpr > 0)
|
||||
{
|
||||
size_t bvs_size = 0;
|
||||
size_t np = 0;
|
||||
|
||||
view_size = gpr;
|
||||
|
||||
for (np = 0; bvs_size < view_size; np++)
|
||||
{
|
||||
long unsigned offset = start;
|
||||
long unsigned len = min_t(size_t, view_size - bvs_size, PAGE_SIZE - start);
|
||||
|
||||
BUG_ON(np >= max_pages);
|
||||
BUG_ON(! pages[np]);
|
||||
|
||||
bvs_size += len;
|
||||
start = 0;
|
||||
|
||||
bvecs[np] = (struct bio_vec) {
|
||||
.bv_page = pages[np],
|
||||
.bv_offset = offset,
|
||||
.bv_len = len,
|
||||
};
|
||||
}
|
||||
|
||||
// make sure we're using all the pages that iov_iter_get_pages() gave us.
|
||||
//BUG_ON(np < max_pages && pages[np]);
|
||||
WARN_ON(np < max_pages && pages[np]);
|
||||
|
||||
rs->npages = np;
|
||||
}
|
||||
|
||||
BEEGFS_IOV_ITER_BVEC(&rs->sanitized_iter, READ, bvecs, rs->npages, view_size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void beegfs_readsink_reserve(BeeGFS_ReadSink *rs, struct iov_iter *iter, size_t size)
|
||||
{
|
||||
#ifdef KERNEL_HAS_ITER_PIPE
|
||||
if (iov_iter_type(iter) == ITER_PIPE)
|
||||
beegfs_readsink_reserve_pipe(rs, iter, size);
|
||||
else
|
||||
beegfs_readsink_reserve_no_pipe(rs, iter, size);
|
||||
#else
|
||||
beegfs_readsink_reserve_no_pipe(rs, iter, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
void beegfs_readsink_release(BeeGFS_ReadSink *rs)
|
||||
{
|
||||
int npages = rs->npages;
|
||||
struct page **pages = rs->pages;
|
||||
|
||||
for (int i = 0; i < npages; i++)
|
||||
{
|
||||
put_page(pages[i]);
|
||||
pages[i] = NULL; // avoid this write?
|
||||
}
|
||||
|
||||
kfree(rs->pages);
|
||||
kfree(rs->bvecs);
|
||||
|
||||
memset(rs, 0, sizeof *rs);
|
||||
}
|
||||
210
client_module/source/os/iov_iter.h
Normal file
210
client_module/source/os/iov_iter.h
Normal file
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* compatibility for older kernels. this code is mostly taken from include/linux/uio.h,
|
||||
* include/linuxfs/fs.h and associated .c files.
|
||||
*
|
||||
* the originals are licensed as:
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <linux/bvec.h>
|
||||
|
||||
#ifndef KERNEL_HAS_ITER_KVEC
|
||||
#error ITER_KVEC is a required feature
|
||||
#endif
|
||||
|
||||
#ifndef KERNEL_HAS_ITER_IS_IOVEC
|
||||
#error iter_is_iovec() is a required feature
|
||||
#endif
|
||||
|
||||
/*
|
||||
* In kernels 3.15 to 6.3 there was iov_iter_iovec(), returning the first iovec
|
||||
* in an iov_iter of type ITER_IOVEC.
|
||||
* 6.4 removes and started using macro iter_iov_addr & iter_iov_len.
|
||||
* Using those now and providing a shim for older kernels.
|
||||
*/
|
||||
#if !defined(KERNEL_HAS_ITER_IOV_ADDR)
|
||||
#define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset)
|
||||
#define iter_iov_len(iter) (iter_iov(iter)->iov_len - (iter)->iov_offset)
|
||||
#endif
|
||||
|
||||
#ifndef KERNEL_HAS_IOV_ITER_INIT_DIR
|
||||
#error We require kernels that have a "direction" parameter to iov_iter_init().
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef KERNEL_HAS_IOV_ITER_TYPE
|
||||
static inline int iov_iter_type(const struct iov_iter *i)
|
||||
{
|
||||
return i->type & ~(READ | WRITE);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef KERNEL_HAS_IOV_ITER_IS_PIPE
|
||||
static inline bool iov_iter_is_pipe(struct iov_iter* iter)
|
||||
{
|
||||
#ifdef KERNEL_HAS_ITER_PIPE
|
||||
return iov_iter_type(iter) == ITER_PIPE;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static inline int beegfs_iov_iter_is_iovec(const struct iov_iter *iter)
|
||||
{
|
||||
return iov_iter_type(iter) == ITER_IOVEC;
|
||||
}
|
||||
|
||||
// TODO: Now that ITER_KVEC is required across all kernels, is this function still needed?
|
||||
static inline struct iov_iter *beegfs_get_iovec_iov_iter(struct iov_iter *iter)
|
||||
{
|
||||
BUG_ON(!beegfs_iov_iter_is_iovec(iter));
|
||||
return iter;
|
||||
}
|
||||
|
||||
static inline unsigned long beegfs_iov_iter_nr_segs(const struct iov_iter *iter)
|
||||
{
|
||||
return iter->nr_segs;
|
||||
}
|
||||
|
||||
static inline void beegfs_iov_iter_clear(struct iov_iter *iter)
|
||||
{
|
||||
iter->count = 0;
|
||||
}
|
||||
|
||||
#ifdef KERNEL_HAS_ITER_PIPE
|
||||
static inline bool beegfs_is_pipe_iter(struct iov_iter * iter)
|
||||
{
|
||||
return iov_iter_type(iter) == ITER_PIPE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define BEEGFS_IOV_ITER_INIT iov_iter_init
|
||||
|
||||
static inline void BEEGFS_IOV_ITER_KVEC(struct iov_iter *iter, int direction,
|
||||
const struct kvec* kvec, unsigned long nr_segs, size_t count)
|
||||
{
|
||||
#ifndef KERNEL_HAS_IOV_ITER_KVEC_NO_TYPE_FLAG_IN_DIRECTION
|
||||
direction |= ITER_KVEC;
|
||||
#endif
|
||||
iov_iter_kvec(iter, direction, kvec, nr_segs, count);
|
||||
}
|
||||
|
||||
static inline void BEEGFS_IOV_ITER_BVEC(struct iov_iter *iter, int direction,
|
||||
const struct bio_vec* bvec, unsigned long nr_segs, size_t count)
|
||||
{
|
||||
#ifndef KERNEL_HAS_IOV_ITER_KVEC_NO_TYPE_FLAG_IN_DIRECTION
|
||||
direction |= ITER_BVEC;
|
||||
#endif
|
||||
iov_iter_bvec(iter, direction, bvec, nr_segs, count);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
BeeGFS_ReadSink
|
||||
|
||||
We can't get parallel reads to work easily with ITER_PIPE. That type of iter
|
||||
doesn't allow splitting up a region easily for parallel writing. The reason
|
||||
is that the iov_iter_advance() implementation for ITER_PIPE modifies shared
|
||||
state (the pipe_inode structure).
|
||||
|
||||
The BeeGFS_ReadSink structure allows to abstract from that concern by
|
||||
converting to an ITER_BVEC iter where necessary.
|
||||
|
||||
Use is as follows:
|
||||
1) Initialize the struct by zeroing out, or using a {0} initializer.
|
||||
This allows the cleanup routine to work even if nothing was ever
|
||||
allocated.
|
||||
|
||||
1) Call _reserve() to set up a view of the given size into a given iov_iter
|
||||
struct. If the given iov_iter is not of type ITER_PIPE, it will be copied
|
||||
straight to the "sanitized_iter" field. Otherwise (if it is an
|
||||
ITER_PIPE), an ITER_BVEC iterator will be made by allocating pages from
|
||||
the pipe and setting up a bio_vec for each page.
|
||||
|
||||
Note that this can fail in low memory situations. The size of the view
|
||||
that was successfully allocated can be queried by calling
|
||||
iov_iter_count() on the sanitized_iter field.
|
||||
|
||||
2) The sanitized_iter field should be used to read data. The field can be
|
||||
used destructively. In particular it is safe to call iov_iter_advance()
|
||||
on it in order to partition the view for multiple parallel reads.
|
||||
|
||||
3) When reads are done, probably, iov_iter_advance() should be called on
|
||||
the iter that was given to _reserve().
|
||||
|
||||
4) Call _release() to give back the pages that were reserved in step 2).
|
||||
If the struct was properly initialized in step 1), is safe to call
|
||||
_release() even if _reserve() was never called. This is useful when cleaning
|
||||
up state after an early exit.
|
||||
|
||||
5) Go back to 2) if necessary, to copy more data.
|
||||
*/
|
||||
|
||||
typedef struct _BeeGFS_ReadSink BeeGFS_ReadSink;
|
||||
struct _BeeGFS_ReadSink {
|
||||
size_t npages; // Number of pages currently in use (get_page())
|
||||
struct page **pages; // 0..npages
|
||||
struct bio_vec *bvecs; // 0..npages
|
||||
|
||||
// output value
|
||||
struct iov_iter sanitized_iter;
|
||||
};
|
||||
|
||||
void beegfs_readsink_reserve(BeeGFS_ReadSink *rs, struct iov_iter *iter, size_t size);
|
||||
void beegfs_readsink_release(BeeGFS_ReadSink *rs);
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
We have lots of code locations where we need to read or write memory using a
|
||||
pointer + length pair, but need to use an iov_iter based API. This always
|
||||
leads to boilerplate where struct iovec and struct iov_iter values have to be
|
||||
declared on the stack. The following hack is meant to reduce that boilerplate.
|
||||
*/
|
||||
#define STACK_ALLOC_BEEGFS_ITER_IOV(ptr, size, direction) \
|
||||
___BEEGFS_IOV_ITER_INIT(&(struct iov_iter){0}, &(struct iovec){0}, (ptr), (size), (direction))
|
||||
|
||||
#define STACK_ALLOC_BEEGFS_ITER_KVEC(ptr, size, direction) \
|
||||
___BEEGFS_IOV_ITER_KVEC(&(struct iov_iter){0}, &(struct kvec){0}, (ptr), (size), (direction))
|
||||
|
||||
static inline struct iov_iter *___BEEGFS_IOV_ITER_INIT(
|
||||
struct iov_iter *iter, struct iovec *iovec,
|
||||
const char __user *ptr, size_t size, int direction)
|
||||
{
|
||||
unsigned nr_segs = 1;
|
||||
*iovec = (struct iovec) {
|
||||
.iov_base = (char __user *) ptr,
|
||||
.iov_len = size,
|
||||
};
|
||||
BEEGFS_IOV_ITER_INIT(iter, direction, iovec, nr_segs, size);
|
||||
return iter;
|
||||
}
|
||||
|
||||
static inline struct iov_iter *___BEEGFS_IOV_ITER_KVEC(
|
||||
struct iov_iter *iter, struct kvec* kvec,
|
||||
const char *ptr, size_t size, int direction)
|
||||
{
|
||||
unsigned nr_segs = 1;
|
||||
*kvec = (struct kvec) {
|
||||
.iov_base = (char *) ptr,
|
||||
.iov_len = size,
|
||||
};
|
||||
BEEGFS_IOV_ITER_KVEC(iter, direction, kvec, nr_segs, size);
|
||||
return iter;
|
||||
}
|
||||
Reference in New Issue
Block a user