New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

View File

@@ -0,0 +1,90 @@
#ifndef CONNECTIONLIST_H_
#define CONNECTIONLIST_H_
#include <common/Common.h>
#include <common/toolkit/list/PointerList.h>
#include <common/net/sock/PooledSocket.h>
struct ConnectionList;
typedef struct ConnectionList ConnectionList;
static inline void ConnectionList_init(ConnectionList* this, bool owner);
static inline void ConnectionList_uninit(ConnectionList* this);
static inline void ConnectionList_append(ConnectionList* this, PooledSocket* socket);
static inline void ConnectionList_prepend(ConnectionList* this, PooledSocket* socket);
static inline int ConnectionList_moveToHead(ConnectionList* this, PooledSocket* socket);
static inline int ConnectionList_moveToTail(ConnectionList* this, PooledSocket* socket);
static inline int ConnectionList_remove(ConnectionList* this, PooledSocket* socket);
static inline size_t ConnectionList_length(ConnectionList* this);
struct ConnectionList
{
PointerList pointerList;
bool owner;
};
/*
* @param owner this list owns any PooledSockets added as list elements. The owner list
maintains the socket's pool and poolElem members. A temporary list should set
this parameter to false.
*/
void ConnectionList_init(ConnectionList* this, bool owner)
{
PointerList_init( (PointerList*)this);
this->owner = owner;
}
void ConnectionList_uninit(ConnectionList* this)
{
PointerList_uninit( (PointerList*)this);
}
void ConnectionList_prepend(ConnectionList* this, PooledSocket* socket)
{
PointerList_addHead( (PointerList*)this, socket);
if (this->owner)
PooledSocket_setPool(socket, this, PointerList_getHead( (PointerList*) this));
}
void ConnectionList_append(ConnectionList* this, PooledSocket* socket)
{
PointerList_append( (PointerList*)this, socket);
if (this->owner)
PooledSocket_setPool(socket, this, PointerList_getTail( (PointerList*) this));
}
int ConnectionList_remove(ConnectionList* this, PooledSocket* socket)
{
if (unlikely(PooledSocket_getPoolElem(socket) == NULL))
return -EINVAL;
PointerList_removeElem( (PointerList*)this, PooledSocket_getPoolElem(socket));
if (this->owner)
PooledSocket_setPool(socket, NULL, NULL);
return 0;
}
int ConnectionList_moveToHead(ConnectionList* this, PooledSocket* socket)
{
if (unlikely(PooledSocket_getPoolElem(socket) == NULL))
return -EINVAL;
PointerList_moveToHead( (PointerList*) this, PooledSocket_getPoolElem(socket));
return 0;
}
int ConnectionList_moveToTail(ConnectionList* this, PooledSocket* socket)
{
if (unlikely(PooledSocket_getPoolElem(socket) == NULL))
return -EINVAL;
PointerList_moveToTail( (PointerList*) this, PooledSocket_getPoolElem(socket));
return 0;
}
static inline size_t ConnectionList_length(ConnectionList* this)
{
return PointerList_length( (PointerList*)this);
}
#endif /*CONNECTIONLIST_H_*/

View File

@@ -0,0 +1,61 @@
#ifndef CONNECTIONLISTITER_H_
#define CONNECTIONLISTITER_H_
#include <common/toolkit/list/PointerListIter.h>
#include "ConnectionList.h"
struct ConnectionListIter;
typedef struct ConnectionListIter ConnectionListIter;
static inline void ConnectionListIter_init(ConnectionListIter* this, ConnectionList* list);
static inline void ConnectionListIter_next(ConnectionListIter* this);
static inline struct PooledSocket* ConnectionListIter_value(ConnectionListIter* this);
static inline bool ConnectionListIter_end(ConnectionListIter* this);
static inline ConnectionListIter ConnectionListIter_remove(ConnectionListIter* this);
struct ConnectionListIter
{
PointerListIter pointerListIter;
};
void ConnectionListIter_init(ConnectionListIter* this, ConnectionList* list)
{
PointerListIter_init( (PointerListIter*)this, (PointerList*)list);
}
void ConnectionListIter_next(ConnectionListIter* this)
{
PointerListIter_next( (PointerListIter*)this);
}
struct PooledSocket* ConnectionListIter_value(ConnectionListIter* this)
{
return (struct PooledSocket*)PointerListIter_value( (PointerListIter*)this);
}
bool ConnectionListIter_end(ConnectionListIter* this)
{
return PointerListIter_end( (PointerListIter*)this);
}
/**
* note: the current iterator becomes invalid after the call (use the returned iterator)
* @return the new iterator that points to the element just behind the erased one
*/
ConnectionListIter ConnectionListIter_remove(ConnectionListIter* this)
{
ConnectionListIter newIter = *this;
PooledSocket* sock = ConnectionListIter_value(this);
ConnectionListIter_next(&newIter); // the new iter that will be returned
PointerListIter_remove( (PointerListIter*)this);
PooledSocket_setPool(sock, NULL, NULL);
return newIter;
}
#endif /*CONNECTIONLISTITER_H_*/

View File

@@ -0,0 +1,15 @@
#ifndef DEVICEPRIORITYCONTEXT_H_
#define DEVICEPRIORITYCONTEXT_H_
struct DevicePriorityContext
{
int maxConns;
#ifdef BEEGFS_NVFS
// index of GPU related to the first page, -1 for none
int gpuIndex;
#endif
};
typedef struct DevicePriorityContext DevicePriorityContext;
#endif // DEVICEPRIORITYCONTEXT_H_

View File

@@ -0,0 +1,227 @@
#include "MirrorBuddyGroup.h"
struct MirrorBuddyGroup* MirrorBuddyGroup_constructFromTargetIDs(uint16_t groupID,
uint16_t doneBufferSize, uint16_t firstTargetID, uint16_t secondTargetID)
{
struct MirrorBuddyGroup* this = (MirrorBuddyGroup*) os_kmalloc(sizeof(*this));
if (!this)
return NULL;
this->groupID = groupID;
this->firstTargetID = firstTargetID;
this->secondTargetID = secondTargetID;
this->sequence = 0;
if (doneBufferSize == 0)
goto fail;
this->inFlightSize = 0;
this->inFlightCapacity = doneBufferSize;
this->seqNoInFlight = kmalloc(doneBufferSize * sizeof(struct BuddySequenceNumber), GFP_NOFS);
if (!this->seqNoInFlight)
this->seqNoInFlight = vmalloc(doneBufferSize * sizeof(struct BuddySequenceNumber));
if (!this->seqNoInFlight)
goto fail;
this->firstFinishedIndex = 0;
this->finishedCount = 0;
this->finishedSeqNums = kmalloc(doneBufferSize * sizeof(uint64_t), GFP_NOFS);
if (!this->finishedSeqNums)
this->finishedSeqNums = vmalloc(doneBufferSize * sizeof(uint64_t));
if (!this->finishedSeqNums)
goto fail_seqNoInFlight;
mutex_init(&this->mtx);
sema_init(&this->slotsAvail, doneBufferSize);
kref_init(&this->refs);
return this;
fail_seqNoInFlight:
if (is_vmalloc_addr(this->seqNoInFlight))
vfree(this->seqNoInFlight);
else
kfree(this->seqNoInFlight);
fail:
kfree(this);
return NULL;
}
static void __MirrorBuddyGroup_destruct(struct kref* ref)
{
MirrorBuddyGroup* this = container_of(ref, MirrorBuddyGroup, refs);
if (is_vmalloc_addr(this->seqNoInFlight))
vfree(this->seqNoInFlight);
else
kfree(this->seqNoInFlight);
if (is_vmalloc_addr(this->finishedSeqNums))
vfree(this->finishedSeqNums);
else
kfree(this->finishedSeqNums);
mutex_destroy(&this->mtx);
kfree(this);
}
void MirrorBuddyGroup_put(MirrorBuddyGroup* this)
{
kref_put(&this->refs, __MirrorBuddyGroup_destruct);
}
int MirrorBuddyGroup_acquireSequenceNumber(MirrorBuddyGroup* this,
uint64_t* acknowledgeSeq, bool* isSelective, uint64_t* seqNo,
struct BuddySequenceNumber** handle, bool allowWait)
{
int result = 0;
if (allowWait)
{
if (down_killable(&this->slotsAvail))
return EINTR;
}
else
{
if (down_trylock(&this->slotsAvail))
return EAGAIN;
}
kref_get(&this->refs);
mutex_lock(&this->mtx);
do {
if (this->sequence == 0)
{
*seqNo = 0;
*handle = NULL;
result = ENOENT;
up(&this->slotsAvail);
MirrorBuddyGroup_put(this);
break;
}
*seqNo = ++this->sequence;
// seqNoInFlight is a binary min-heap, and we add values from a strictly increasing sequence.
// thus any such append produces a correctly formed heap.
this->seqNoInFlight[this->inFlightSize].pSelf = handle;
this->seqNoInFlight[this->inFlightSize].value = *seqNo;
*handle = &this->seqNoInFlight[this->inFlightSize];
this->inFlightSize++;
if (this->finishedCount > 0)
{
*acknowledgeSeq = this->finishedSeqNums[this->firstFinishedIndex];
this->firstFinishedIndex = (this->firstFinishedIndex + 1) % this->inFlightCapacity;
this->finishedCount -= 1;
*isSelective = true;
}
else
{
*acknowledgeSeq = this->seqNoInFlight[0].value - 1;
*isSelective = false;
}
} while (0);
mutex_unlock(&this->mtx);
return result;
}
void MirrorBuddyGroup_releaseSequenceNumber(MirrorBuddyGroup* this,
struct BuddySequenceNumber** handle)
{
mutex_lock(&this->mtx);
{
struct BuddySequenceNumber* slot = *handle;
unsigned nextFinishedIndex;
BEEGFS_BUG_ON_DEBUG(slot < &this->seqNoInFlight[0], "");
BEEGFS_BUG_ON_DEBUG(slot >= &this->seqNoInFlight[this->inFlightSize], "");
// before maintaining the heap, add the sequence number to the "finished seq#" ringbuffer.
if (this->finishedCount < this->inFlightCapacity)
{
this->finishedCount = this->finishedCount + 1;
nextFinishedIndex = (this->firstFinishedIndex + this->finishedCount - 1)
% this->inFlightCapacity;
}
else
{
this->firstFinishedIndex = (this->firstFinishedIndex + 1) % this->inFlightCapacity;
nextFinishedIndex = this->firstFinishedIndex;
}
this->finishedSeqNums[nextFinishedIndex] = (*handle)->value;
// decrease the key of the seq# to minimum
while (slot != &this->seqNoInFlight[0])
{
const ptrdiff_t index = slot - &this->seqNoInFlight[0];
const ptrdiff_t parentIndex = index / 2;
swap(this->seqNoInFlight[index], this->seqNoInFlight[parentIndex]);
*this->seqNoInFlight[index].pSelf = &this->seqNoInFlight[index];
*this->seqNoInFlight[parentIndex].pSelf = &this->seqNoInFlight[parentIndex];
slot = &this->seqNoInFlight[parentIndex];
}
// remove the "minimal" element
this->inFlightSize--;
swap(this->seqNoInFlight[0], this->seqNoInFlight[this->inFlightSize]);
*this->seqNoInFlight[0].pSelf = &this->seqNoInFlight[0];
*this->seqNoInFlight[this->inFlightSize].pSelf = &this->seqNoInFlight[this->inFlightSize];
// move the new root down to restore the heap property
{
unsigned i;
for (i = 0; ;)
{
const unsigned leftChild = 2 * i + 1;
const unsigned rightChild = 2 * i + 2;
unsigned minNode = i;
if (leftChild < this->inFlightSize
&& this->seqNoInFlight[leftChild].value < this->seqNoInFlight[minNode].value)
minNode = leftChild;
if (rightChild < this->inFlightSize
&& this->seqNoInFlight[rightChild].value < this->seqNoInFlight[minNode].value)
minNode = rightChild;
if (minNode != i)
{
swap(this->seqNoInFlight[i], this->seqNoInFlight[minNode]);
*this->seqNoInFlight[i].pSelf = &this->seqNoInFlight[i];
*this->seqNoInFlight[minNode].pSelf = &this->seqNoInFlight[minNode];
i = minNode;
}
else
{
break;
}
}
}
}
mutex_unlock(&this->mtx);
up(&this->slotsAvail);
MirrorBuddyGroup_put(this);
}
void MirrorBuddyGroup_setSeqNoBase(MirrorBuddyGroup* this, uint64_t seqNoBase)
{
mutex_lock(&this->mtx);
if (this->sequence < seqNoBase)
this->sequence = seqNoBase;
mutex_unlock(&this->mtx);
}

View File

@@ -0,0 +1,62 @@
#ifndef MIRRORBUDDYGROUP_H
#define MIRRORBUDDYGROUP_H
#include <common/Common.h>
#include <common/toolkit/MathTk.h>
#include <os/OsCompat.h>
struct MirrorBuddyGroup;
typedef struct MirrorBuddyGroup MirrorBuddyGroup;
// used to build a binary min-heap with backlink handles.
// an inserter will receive a handle to the inserted value, this handle may be used to remove the
// value in logarithmic time.
struct BuddySequenceNumber
{
struct BuddySequenceNumber** pSelf;
uint64_t value;
};
struct MirrorBuddyGroup
{
uint16_t groupID;
uint16_t firstTargetID;
uint16_t secondTargetID;
uint64_t sequence;
struct semaphore slotsAvail;
struct BuddySequenceNumber* seqNoInFlight;
unsigned inFlightSize;
unsigned inFlightCapacity;
uint64_t* finishedSeqNums;
unsigned firstFinishedIndex;
unsigned finishedCount;
struct mutex mtx;
struct kref refs;
/* private */
union {
struct rb_node _rb_node;
struct list_head _list;
};
};
extern struct MirrorBuddyGroup* MirrorBuddyGroup_constructFromTargetIDs(uint16_t groupID,
uint16_t doneBufferSize, uint16_t firstTargetID, uint16_t secondTargetID);
extern void MirrorBuddyGroup_put(MirrorBuddyGroup* this);
extern int MirrorBuddyGroup_acquireSequenceNumber(MirrorBuddyGroup* this,
uint64_t* acknowledgeSeq, bool* isSelective, uint64_t* seqNo,
struct BuddySequenceNumber** handle, bool allowWait);
extern void MirrorBuddyGroup_releaseSequenceNumber(MirrorBuddyGroup* this,
struct BuddySequenceNumber** handle);
extern void MirrorBuddyGroup_setSeqNoBase(MirrorBuddyGroup* this, uint64_t seqNoBase);
#endif /* MIRRORBUDDYGROUP_H */

View File

@@ -0,0 +1,293 @@
#include <common/nodes/TargetMapper.h>
#include "MirrorBuddyGroupMapper.h"
BEEGFS_RBTREE_FUNCTIONS(static, _MirrorBuddyGroupMapper, struct MirrorBuddyGroupMapper,
mirrorBuddyGroups,
uint16_t,
struct MirrorBuddyGroup, groupID, _rb_node,
BEEGFS_RB_KEYCMP_LT_INTEGRAL)
void MirrorBuddyGroupMapper_init(MirrorBuddyGroupMapper* this)
{
RWLock_init(&this->rwlock);
this->mirrorBuddyGroups = RB_ROOT;
}
static void _MirrorBuddyGroupMapper_clear(MirrorBuddyGroupMapper* this)
{
MirrorBuddyGroup* pos;
MirrorBuddyGroup* n;
rbtree_postorder_for_each_entry_safe(pos, n, &this->mirrorBuddyGroups, _rb_node)
MirrorBuddyGroup_put(pos);
this->mirrorBuddyGroups = RB_ROOT;
}
MirrorBuddyGroupMapper* MirrorBuddyGroupMapper_construct(void)
{
MirrorBuddyGroupMapper* this = (MirrorBuddyGroupMapper*)os_kmalloc(sizeof(*this) );
if (likely(this))
MirrorBuddyGroupMapper_init(this);
return this;
}
void MirrorBuddyGroupMapper_uninit(MirrorBuddyGroupMapper* this)
{
_MirrorBuddyGroupMapper_clear(this);
}
void MirrorBuddyGroupMapper_destruct(MirrorBuddyGroupMapper* this)
{
MirrorBuddyGroupMapper_uninit(this);
kfree(this);
}
/**
* @return 0 if group ID not found
*/
uint16_t MirrorBuddyGroupMapper_getPrimaryTargetID(MirrorBuddyGroupMapper* this,
uint16_t mirrorBuddyGroupID)
{
MirrorBuddyGroup* buddyGroup;
uint16_t targetID;
RWLock_readLock(&this->rwlock); // L O C K
buddyGroup = _MirrorBuddyGroupMapper_find(this, mirrorBuddyGroupID);
if(likely(buddyGroup))
targetID = buddyGroup->firstTargetID;
else
targetID = 0;
RWLock_readUnlock(&this->rwlock); // U N L O C K
return targetID;
}
/**
* @return 0 if group ID not found
*/
uint16_t MirrorBuddyGroupMapper_getSecondaryTargetID(MirrorBuddyGroupMapper* this,
uint16_t mirrorBuddyGroupID)
{
MirrorBuddyGroup* buddyGroup;
uint16_t targetID;
RWLock_readLock(&this->rwlock); // L O C K
buddyGroup = _MirrorBuddyGroupMapper_find(this, mirrorBuddyGroupID);
if(likely(buddyGroup))
targetID = buddyGroup->secondTargetID;
else
targetID = 0;
RWLock_readUnlock(&this->rwlock); // U N L O C K
return targetID;
}
int MirrorBuddyGroupMapper_acquireSequenceNumber(MirrorBuddyGroupMapper* this,
uint16_t mirrorBuddyGroupID, uint64_t* seqNo, uint64_t* finishedSeqNo, bool* isSelective,
struct BuddySequenceNumber** handle, struct MirrorBuddyGroup** group)
{
MirrorBuddyGroup* buddyGroup;
int result = 0;
RWLock_readLock(&this->rwlock);
buddyGroup = _MirrorBuddyGroupMapper_find(this, mirrorBuddyGroupID);
if (!buddyGroup)
{
RWLock_readUnlock(&this->rwlock);
return ENOENT;
}
*group = buddyGroup;
result = MirrorBuddyGroup_acquireSequenceNumber(buddyGroup, finishedSeqNo, isSelective, seqNo,
handle, false);
// treat ENOENT (no seqNoBase set) as success. the target node will reply with a generic response
// that sets the seqNoBase for this buddy group.
if (result == 0 || result == ENOENT)
{
RWLock_readUnlock(&this->rwlock);
return 0;
}
// nowait acquire failed, need to wait for a free slot. get a ref, unlock this, and go on
kref_get(&buddyGroup->refs);
RWLock_readUnlock(&this->rwlock);
result = MirrorBuddyGroup_acquireSequenceNumber(buddyGroup, finishedSeqNo, isSelective, seqNo,
handle, true);
MirrorBuddyGroup_put(buddyGroup);
// treat ENOENT as success here, too. we will hopefully never have to wait for a sequence number
// on such a partially initialized state, but it may happen under very high load.
return result == ENOENT ? 0 : result;
}
/**
* NOTE: no sanity checks in here
*/
void MirrorBuddyGroupMapper_syncGroups(MirrorBuddyGroupMapper* this,
Config* config, struct list_head* groups)
{
RWLock_writeLock(&this->rwlock); // L O C K
__MirrorBuddyGroupMapper_syncGroupsUnlocked(this, config, groups);
RWLock_writeUnlock(&this->rwlock); // U N L O C K
}
/**
* note: caller must hold writelock.
*/
void __MirrorBuddyGroupMapper_syncGroupsUnlocked(MirrorBuddyGroupMapper* this,
Config* config, struct list_head* groups)
{
struct BuddyGroupMapping* mapping;
LIST_HEAD(newGroups);
list_for_each_entry(mapping, groups, _list)
{
MirrorBuddyGroup* group;
// if the group exists already, update it and move it over to the new tree
group = _MirrorBuddyGroupMapper_find(this, mapping->groupID);
if (group)
{
group->firstTargetID = mapping->primaryTargetID;
group->secondTargetID = mapping->secondaryTargetID;
_MirrorBuddyGroupMapper_erase(this, group);
list_add_tail(&group->_list, &newGroups);
}
else
{
group = MirrorBuddyGroup_constructFromTargetIDs(mapping->groupID,
config->connMaxInternodeNum, mapping->primaryTargetID, mapping->secondaryTargetID);
list_add_tail(&group->_list, &newGroups);
}
}
_MirrorBuddyGroupMapper_clear(this);
{
MirrorBuddyGroup* pos;
MirrorBuddyGroup* tmp;
list_for_each_entry_safe(pos, tmp, &newGroups, _list)
{
MirrorBuddyGroup* replaced = _MirrorBuddyGroupMapper_insertOrReplace(this, pos);
if (replaced)
MirrorBuddyGroup_put(replaced);
}
}
}
/**
* Adds a new buddy group to the map.
* @param targetMapper global targetmapper; must be set for storage nodes (and only for storage)
* @param buddyGroupID The ID of the new buddy group. Must be non-zero.
* @param primaryTargetID
* @param secondaryTargetID
* @param allowUpdate Allow updating an existing buddy group.
*/
FhgfsOpsErr MirrorBuddyGroupMapper_addGroup(MirrorBuddyGroupMapper* this,
Config* config, TargetMapper* targetMapper, uint16_t buddyGroupID, uint16_t primaryTargetID,
uint16_t secondaryTargetID, bool allowUpdate)
{
FhgfsOpsErr res = FhgfsOpsErr_SUCCESS;
uint16_t primaryInGroup;
uint16_t secondaryInGroup;
MirrorBuddyGroup* buddyGroup;
NumNodeID primaryNodeID;
NumNodeID secondaryNodeID;
// ID = 0 is an error.
if (buddyGroupID == 0)
return FhgfsOpsErr_INVAL;
RWLock_writeLock(&this->rwlock); // L O C K
if (!allowUpdate)
{
// If group already exists return error.
MirrorBuddyGroup* group = _MirrorBuddyGroupMapper_find(this, buddyGroupID);
if (group)
{
res = FhgfsOpsErr_EXISTS;
goto unlock;
}
}
// Check if both targets exist for storage nodes
if (targetMapper)
{
primaryNodeID = TargetMapper_getNodeID(targetMapper, primaryTargetID);
secondaryNodeID = TargetMapper_getNodeID(targetMapper, secondaryTargetID);
if(NumNodeID_isZero(&primaryNodeID) || NumNodeID_isZero(&secondaryNodeID))
{
res = FhgfsOpsErr_UNKNOWNTARGET;
goto unlock;
}
}
// Check that both targets are not yet part of any group.
primaryInGroup = __MirrorBuddyGroupMapper_getBuddyGroupIDUnlocked(this, primaryTargetID);
secondaryInGroup = __MirrorBuddyGroupMapper_getBuddyGroupIDUnlocked(this, secondaryTargetID);
if ( ( (primaryInGroup != 0) && (primaryInGroup != buddyGroupID) )
|| ( (secondaryInGroup != 0) && (secondaryInGroup != buddyGroupID) ) )
{
res = FhgfsOpsErr_INUSE;
goto unlock;
}
// Create and insert new mirror buddy group.
buddyGroup = MirrorBuddyGroup_constructFromTargetIDs(buddyGroupID, config->connMaxInternodeNum,
primaryTargetID, secondaryTargetID);
if (unlikely(!buddyGroup) )
{
printk_fhgfs(KERN_INFO, "%s:%d: Failed to allocate memory for MirrorBuddyGroup.",
__func__, __LINE__);
res = FhgfsOpsErr_OUTOFMEM;
goto unlock;
}
_MirrorBuddyGroupMapper_insert(this, buddyGroup);
unlock:
RWLock_writeUnlock(&this->rwlock); // U N L O C K
return res;
}
uint16_t __MirrorBuddyGroupMapper_getBuddyGroupIDUnlocked(MirrorBuddyGroupMapper* this,
uint16_t targetID)
{
MirrorBuddyGroup* buddyGroup;
BEEGFS_RBTREE_FOR_EACH_ENTRY(buddyGroup, &this->mirrorBuddyGroups, _rb_node)
{
if (buddyGroup->firstTargetID == targetID || buddyGroup->secondTargetID == targetID)
{
return buddyGroup->groupID;
}
}
return 0;
}

View File

@@ -0,0 +1,47 @@
#ifndef MIRRORBUDDYGROUPMAPPER_H_
#define MIRRORBUDDYGROUPMAPPER_H_
#include <app/config/Config.h>
#include <common/nodes/TargetMapper.h>
#include <common/nodes/MirrorBuddyGroup.h>
#include <common/threading/RWLock.h>
#include <common/toolkit/list/UInt16ListIter.h>
#include <common/toolkit/StringTk.h>
#include <common/storage/StorageErrors.h>
struct MirrorBuddyGroupMapper;
typedef struct MirrorBuddyGroupMapper MirrorBuddyGroupMapper;
extern void MirrorBuddyGroupMapper_init(MirrorBuddyGroupMapper* this);
extern MirrorBuddyGroupMapper* MirrorBuddyGroupMapper_construct(void);
extern void MirrorBuddyGroupMapper_uninit(MirrorBuddyGroupMapper* this);
extern void MirrorBuddyGroupMapper_destruct(MirrorBuddyGroupMapper* this);
extern uint16_t MirrorBuddyGroupMapper_getPrimaryTargetID(MirrorBuddyGroupMapper* this,
uint16_t mirrorBuddyGroupID);
extern uint16_t MirrorBuddyGroupMapper_getSecondaryTargetID(MirrorBuddyGroupMapper* this,
uint16_t mirrorBuddyGroupID);
extern int MirrorBuddyGroupMapper_acquireSequenceNumber(MirrorBuddyGroupMapper* this,
uint16_t mirrorBuddyGroupID, uint64_t* seqNo, uint64_t* finishedSeqNo, bool* isSelective,
struct BuddySequenceNumber** handle, struct MirrorBuddyGroup** group);
extern void MirrorBuddyGroupMapper_syncGroups(MirrorBuddyGroupMapper* this,
Config* config, struct list_head* groups);
extern FhgfsOpsErr MirrorBuddyGroupMapper_addGroup(MirrorBuddyGroupMapper* this,
Config* config, TargetMapper* targetMapper, uint16_t buddyGroupID, uint16_t primaryTargetID,
uint16_t secondaryTargetID, bool allowUpdate);
extern uint16_t __MirrorBuddyGroupMapper_getBuddyGroupIDUnlocked(MirrorBuddyGroupMapper* this,
uint16_t targetID);
extern void __MirrorBuddyGroupMapper_syncGroupsUnlocked(MirrorBuddyGroupMapper* this,
Config* config, struct list_head* groups);
struct MirrorBuddyGroupMapper
{
// friend class TargetStateStore; // for atomic update of state change plus mirror group switch
RWLock rwlock;
struct rb_root mirrorBuddyGroups; /* struct MirrorBuddyGroup */
};
#endif /* MIRRORBUDDYGROUPMAPPER_H_ */

View File

@@ -0,0 +1,214 @@
#include <os/OsCompat.h>
#include "Node.h"
/**
* @param portUDP value 0 if undefined
* @param portTCP value 0 if undefined
* @param nicList an internal copy will be created
* @param localRdmaNicList an internal copy will be created
*/
void Node_init(Node* this, struct App* app, const char* alias, NumNodeID nodeNumID,
unsigned short portUDP, unsigned short portTCP, NicAddressList* nicList,
NicAddressList* localRdmaNicList)
{
Mutex_init(&this->mutex);
Condition_init(&this->changeCond);
Time_init(&this->lastHeartbeatT);
this->isActive = false;
kref_init(&this->references);
this->numID = nodeNumID;
RWLock_init(&this->aliasAndTypeMu);
this->alias = NULL;
this->nodeAliasWithTypeStr = NULL;
this->nodeType = NODETYPE_Invalid;
// We don't know the node type at this stage, it is set later.
Node_setNodeAliasAndType(this, alias, NODETYPE_Invalid);
this->portUDP = portUDP;
this->connPool = NodeConnPool_construct(app, this, portTCP, nicList, localRdmaNicList);
}
/**
* @param nicList an internal copy will be created
* @param localRdmaNicList an internal copy will be created
*/
Node* Node_construct(struct App* app, const char* nodeID, NumNodeID nodeNumID,
unsigned short portUDP, unsigned short portTCP, NicAddressList* nicList,
NicAddressList* localRdmaNicList)
{
Node* this = (Node*)os_kmalloc(sizeof(*this) );
Node_init(this, app, nodeID, nodeNumID, portUDP, portTCP, nicList, localRdmaNicList);
return this;
}
void Node_uninit(Node* this)
{
SAFE_DESTRUCT(this->connPool, NodeConnPool_destruct);
SAFE_KFREE(this->alias);
SAFE_KFREE(this->nodeAliasWithTypeStr);
Mutex_uninit(&this->mutex);
}
void __Node_destruct(Node* this)
{
Node_uninit(this);
kfree(this);
}
void Node_updateLastHeartbeatT(Node* this)
{
Mutex_lock(&this->mutex);
Time_setToNow(&this->lastHeartbeatT);
Condition_broadcast(&this->changeCond);
Mutex_unlock(&this->mutex);
}
/**
* @param portUDP value 0 if undefined
* @param portTCP value 0 if undefined
* @return true if a port changed
*/
bool Node_updateInterfaces(Node* this, unsigned short portUDP, unsigned short portTCP,
NicAddressList* nicList)
{
bool portChanged = false;
Mutex_lock(&this->mutex);
if(portUDP && (portUDP != this->portUDP) )
{
this->portUDP = portUDP;
portChanged = true;
}
if(NodeConnPool_updateInterfaces(this->connPool, portTCP, nicList) )
portChanged = true;
Mutex_unlock(&this->mutex);
return portChanged;
}
/**
* Returns human-readable node type.
*
* @return static string (not alloced => don't free it)
*/
const char* Node_nodeTypeToStr(NodeType nodeType)
{
switch(nodeType)
{
case NODETYPE_Invalid:
{
return "<undefined/invalid>";
} break;
case NODETYPE_Meta:
{
return "beegfs-meta";
} break;
case NODETYPE_Storage:
{
return "beegfs-storage";
} break;
case NODETYPE_Client:
{
return "beegfs-client";
} break;
case NODETYPE_Mgmt:
{
return "beegfs-mgmtd";
} break;
default:
{
return "<unknown>";
} break;
}
}
inline void putToNodeString(char *from, NodeString *outStr) {
int result;
result = snprintf(outStr->buf, sizeof outStr->buf, "%s", from);
if (unlikely(result < 0)) {
snprintf(outStr->buf, sizeof(outStr->buf), "<error determining alias>");
} else if (unlikely((size_t) result >= sizeof outStr->buf)) {
memcpy(outStr->buf + sizeof outStr->buf - 4, "...\0", 4);
}
}
void Node_copyAlias(Node *this, NodeString *outStr) {
RWLock_readLock(&this->aliasAndTypeMu);
putToNodeString(this->alias, outStr);
RWLock_readUnlock(&this->aliasAndTypeMu);
}
void Node_copyAliasWithTypeStr(Node *this, NodeString *outStr) {
RWLock_readLock(&this->aliasAndTypeMu);
putToNodeString(this->nodeAliasWithTypeStr, outStr);
RWLock_readUnlock(&this->aliasAndTypeMu);
}
bool Node_setNodeAliasAndType(Node* this, const char *aliasInput, NodeType nodeTypeInput) {
char *alias = NULL;
char *aliasAndTypeStr = NULL;
bool err = false;
if (!aliasInput && nodeTypeInput == NODETYPE_Invalid) {
return true; // Nothing to do, return early.
}
if (aliasInput) {
alias = StringTk_strDup(aliasInput);
if (!alias) {
return false;
}
}
RWLock_writeLock(&this->aliasAndTypeMu);
{
const char *nextAlias = alias ? alias : this->alias;
NodeType nextNodeType = nodeTypeInput != NODETYPE_Invalid ? nodeTypeInput : this->nodeType;
if (nextNodeType == NODETYPE_Client) {
aliasAndTypeStr = kasprintf(GFP_NOFS, "%s [%s:?]", nextAlias, Node_nodeTypeToStr(nextNodeType));
}
else {
aliasAndTypeStr = kasprintf(GFP_NOFS, "%s [%s:%u]", nextAlias, Node_nodeTypeToStr(nextNodeType), this->numID.value);
}
if (!aliasAndTypeStr) {
err = true;
}
}
if (! err) {
if (alias) {
swap(this->alias, alias);
}
if (nodeTypeInput != NODETYPE_Invalid) {
swap(this->nodeType, nodeTypeInput);
}
if (aliasAndTypeStr) {
swap(this->nodeAliasWithTypeStr, aliasAndTypeStr);
}
}
RWLock_writeUnlock(&this->aliasAndTypeMu);
if (alias) {
kfree(alias);
}
if (aliasAndTypeStr) {
kfree(aliasAndTypeStr);
}
return ! err;
}

View File

@@ -0,0 +1,269 @@
#ifndef NODE_H_
#define NODE_H_
#include <common/Common.h>
#include <common/nodes/NumNodeID.h>
#include <common/threading/Condition.h>
#include <common/threading/Mutex.h>
#include <common/threading/RWLock.h>
#include <common/toolkit/StringTk.h>
#include <common/toolkit/Time.h>
#include <linux/kref.h>
#include "NodeConnPool.h"
// forward declaration
struct App;
enum NodeType;
typedef enum NodeType NodeType;
struct Node;
typedef struct Node Node;
extern void Node_init(Node* this, struct App* app, const char* nodeID, NumNodeID nodeNumID,
unsigned short portUDP, unsigned short portTCP, NicAddressList* nicList,
NicAddressList* localRdmaNicList);
extern Node* Node_construct(struct App* app, const char* nodeID, NumNodeID nodeNumID,
unsigned short portUDP, unsigned short portTCP, NicAddressList* nicList,
NicAddressList* localRdmaNicList);
extern void Node_uninit(Node* this);
extern void __Node_destruct(Node* this);
extern void Node_updateLastHeartbeatT(Node* this);
extern bool Node_updateInterfaces(Node* this, unsigned short portUDP, unsigned short portTCP,
NicAddressList* nicList);
/**
* NodeString is a structure to hold a formatted string representation related to a Node.
*
* This structure provides a fixed-size buffer that can store various strings associated with a
* Node, such as the node's alias or a combination of the alias and node type. It is designed to be
* used in functions where thread-safe operations are required to copy or format node-related
* strings into a pre-allocated buffer.
*/
typedef struct NodeString {
// The mgmtd only allows aliases up to 32 characters with characters limited to "a-zA-Z0-9.-_".
// It is also used for the AliasWithTypeStr which may be up to 66 bytes:
//
// `<32 CHARACTER ALIAS> [<undefined/invalid>:4294967295]\0`
//
// Because it is unlikely we ever encounter an <undefined/invalid> node with an ID that is the
// uint32 max and a 32 character alias the buffer size is set to 64 bytes which corresponds to
// the cache line on most architectures. Longer strings will be truncated and end with ellipses.
char buf[64];
} NodeString;
/**
* Node_copyAliasWithTypeStr copies the alias, numerical ID (for servers) and the node type in a
* human-readable string. Intended as a convenient way to get a string identifying a node/client for
* log messages.
*
* @param this is which Node to return an identifying string for.
* @param outStr is where the string should be copied to.
*
* IMPORTANT: With the shift from string IDs to aliases in b8.0, aliases may be updated dynamically.
* Callers MUST use this function to access the alias.
*/
extern void Node_copyAliasWithTypeStr(Node *this, NodeString *outStr);
/**
* Node_copyAlias gets a copy of the node's alias (formerly known as a string ID).
*
* @param this is a pointer to the Node structure with the alias to get.
* @param outStr is where the alias should be copied to.
*
* @return string remains valid until Node_putAlias is called; caller must not free the string.
* IMPORTANT: With the shift from string IDs to aliases in b8.0, aliases may be updated dynamically.
* Callers MUST use this function to access the alias.
*/
extern void Node_copyAlias(Node *this, NodeString *outStr);
// static
extern const char* Node_nodeTypeToStr(NodeType nodeType);
// getters & setters
/**
* Node_setNodeAliasAndType() handles thread safe updates to the alias (formerly node string ID),
* node type, and nodeAliasWithTypeStr. It blocks until other writers (unlikely) have finished
* making updates or readers (more likely) have released these fields with the corresponding
* Node_putX functions.
*
* @param this is a pointer to the Node structure with the alias, nodeType, and
* nodeAliasWithTypeStr.
* @param alias is the alias to set. Copies the alias and does not take ownership of it. The caller
* is responsible for freeing the alias when appropriate.
* @param nodeType the nodeType to set.
*
* The alias or nodeType can be respectively NULL or set to NODETYPE_Invalid to only update one
* field and update the nodeAliasWithTypeStr.
*/
extern bool Node_setNodeAliasAndType(Node* this, const char *alias, NodeType nodeType);
static inline NumNodeID Node_getNumID(Node* this);
static inline void Node_setNumID(Node* this, const NumNodeID numID);
static inline void Node_cloneNicList(Node* this, NicAddressList* nicList);
static inline void Node_updateLocalInterfaces(Node* this, NicAddressList* localNicList,
NicListCapabilities* localNicCaps, NicAddressList* localRdmaNicList);
static inline NodeConnPool* Node_getConnPool(Node* this);
static inline void Node_setIsActive(Node* this, bool isActive);
static inline bool Node_getIsActive(Node* this);
static inline unsigned short Node_getPortUDP(Node* this);
static inline unsigned short Node_getPortTCP(Node* this);
static inline NodeType Node_getNodeType(Node* this);
static inline const char* Node_getNodeTypeStr(Node* this);
enum NodeType
{NODETYPE_Invalid = 0, NODETYPE_Meta = 1, NODETYPE_Storage = 2, NODETYPE_Client = 3,
NODETYPE_Mgmt = 4};
struct Node
{
NumNodeID numID; // numeric ID, assigned by mgmtd server store (unused for clients)
// Must be locked before accessing the alias, nodeType, or nodeAliasWithTypeStr.
RWLock aliasAndTypeMu;
char* alias; // alias (formerly string ID): initially generated locally on each node but thread safe and able to be updated as of b8.0
NodeType nodeType; // set by NodeStore::addOrUpdate()
char* nodeAliasWithTypeStr; // for log messages (initially NULL, initialized when the alias is set/updated)
NodeConnPool* connPool;
unsigned short portUDP;
Time lastHeartbeatT; // last heartbeat receive time
bool isActive; // for internal use by the NodeStore only
Mutex mutex;
Condition changeCond; // for last heartbeat time only
struct kref references;
/* used by node tree */
struct {
struct rb_node rbTreeElement;
} _nodeTree;
};
static inline Node* Node_get(Node* node)
{
kref_get(&node->references);
return node;
}
static inline void __Node_put(struct kref* ref)
{
__Node_destruct(container_of(ref, Node, references) );
}
static inline int Node_put(Node* node)
{
return kref_put(&node->references, __Node_put);
}
NumNodeID Node_getNumID(Node* this)
{
return this->numID;
}
void Node_setNumID(Node* this, const NumNodeID numID)
{
this->numID = numID;
}
/**
* Retrieve NICs for the node.
*
* @param nicList an uninitialized NicAddressList. Caller is responsible for
* memory management.
*/
void Node_cloneNicList(Node* this, NicAddressList* nicList)
{
NodeConnPool_cloneNicList(this->connPool, nicList);
}
/**
* @param localNicList copied
* @param localNicCaps copied
* @param localRdmaNicList copied
*/
void Node_updateLocalInterfaces(Node* this, NicAddressList* localNicList,
NicListCapabilities* localNicCaps, NicAddressList* localRdmaNicList)
{
NodeConnPool_updateLocalInterfaces(this->connPool, localNicList, localNicCaps, localRdmaNicList);
}
NodeConnPool* Node_getConnPool(Node* this)
{
return this->connPool;
}
/**
* Note: For nodes that live inside a NodeStore, this method should only be called by the NodeStore.
*/
void Node_setIsActive(Node* this, bool isActive)
{
Mutex_lock(&this->mutex);
this->isActive = isActive;
Mutex_unlock(&this->mutex);
}
bool Node_getIsActive(Node* this)
{
bool isActive;
Mutex_lock(&this->mutex);
isActive = this->isActive;
Mutex_unlock(&this->mutex);
return isActive;
}
unsigned short Node_getPortUDP(Node* this)
{
unsigned short retVal;
Mutex_lock(&this->mutex);
retVal = this->portUDP;
Mutex_unlock(&this->mutex);
return retVal;
}
unsigned short Node_getPortTCP(Node* this)
{
return NodeConnPool_getStreamPort(this->connPool);
}
NodeType Node_getNodeType(Node* this)
{
NodeType nodeType;
RWLock_readLock(&this->aliasAndTypeMu);
nodeType = this->nodeType;
RWLock_readUnlock(&this->aliasAndTypeMu);
return nodeType;
}
/**
* Returns human-readable node type string.
*
* @return static string (not alloced => don't free it)
*/
const char* Node_getNodeTypeStr(Node* this)
{
const char* nodeType;
RWLock_readLock(&this->aliasAndTypeMu);
nodeType = Node_nodeTypeToStr(this->nodeType);
RWLock_readUnlock(&this->aliasAndTypeMu);
return nodeType;
}
#endif /*NODE_H_*/

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,226 @@
#ifndef NODECONNPOOL_H_
#define NODECONNPOOL_H_
#include <app/log/Logger.h>
#include <app/App.h>
#include <common/net/sock/NetworkInterfaceCard.h>
#include <common/net/sock/StandardSocket.h>
#include <common/net/sock/NicAddressStatsList.h>
#include <common/threading/Mutex.h>
#include <common/threading/Condition.h>
#include <common/toolkit/ListTk.h>
#include <common/toolkit/SocketTk.h>
#include <common/Common.h>
#include "ConnectionList.h"
#include "ConnectionListIter.h"
#include "DevicePriorityContext.h"
// forward declaration
struct App;
struct Node;
struct NodeConnPoolStats;
typedef struct NodeConnPoolStats NodeConnPoolStats;
struct NodeConnPoolErrorState;
typedef struct NodeConnPoolErrorState NodeConnPoolErrorState;
struct NodeConnPool;
typedef struct NodeConnPool NodeConnPool;
extern void NodeConnPool_init(NodeConnPool* this, struct App* app, struct Node* parentNode,
unsigned short streamPort, NicAddressList* nicList, NicAddressList* localRdmaNicList);
extern NodeConnPool* NodeConnPool_construct(struct App* app, struct Node* parentNode,
unsigned short streamPort, NicAddressList* nicList, NicAddressList* localRdmaNicList);
extern void NodeConnPool_uninit(NodeConnPool* this);
extern void NodeConnPool_destruct(NodeConnPool* this);
extern Socket* NodeConnPool_acquireStreamSocket(NodeConnPool* this);
extern Socket* NodeConnPool_acquireStreamSocketEx(NodeConnPool* this, bool allowWaiting,
DevicePriorityContext* devPrioCtx);
extern void NodeConnPool_releaseStreamSocket(NodeConnPool* this, Socket* sock);
extern void NodeConnPool_invalidateStreamSocket(NodeConnPool* this, Socket* sock);
extern unsigned NodeConnPool_disconnectAvailableStreams(NodeConnPool* this);
extern unsigned NodeConnPool_disconnectAndResetIdleStreams(NodeConnPool* this);
extern bool NodeConnPool_updateInterfaces(NodeConnPool* this, unsigned short streamPort,
NicAddressList* nicList);
extern void __NodeConnPool_invalidateSpecificStreamSocket(NodeConnPool* this, Socket* sock);
extern unsigned __NodeConnPool_invalidateAvailableStreams(NodeConnPool* this,
bool idleStreamsOnly, bool closeOnRelease);
extern void __NodeConnPool_resetStreamsIdleFlag(NodeConnPool* this);
extern bool __NodeConnPool_applySocketOptionsPreConnect(NodeConnPool* this, Socket* sock);
extern bool __NodeConnPool_applySocketOptionsConnected(NodeConnPool* this, Socket* sock);
extern void __NodeConnPool_statsAddNic(NodeConnPool* this, NicAddrType_t nicType);
extern void __NodeConnPool_statsRemoveNic(NodeConnPool* this, NicAddrType_t nicType);
extern void __NodeConnPool_setCompleteFail(NodeConnPool* this);
extern void __NodeConnPool_setConnSuccess(NodeConnPool* this, struct in_addr lastSuccessPeerIP,
NicAddrType_t lastSuccessNicType);
extern bool __NodeConnPool_equalsLastSuccess(NodeConnPool* this, struct in_addr lastSuccessPeerIP,
NicAddrType_t lastSuccessNicType);
extern bool __NodeConnPool_isLastSuccessInitialized(NodeConnPool* this);
extern bool __NodeConnPool_shouldPrintConnectedLogMsg(NodeConnPool* this,
struct in_addr currentPeerIP, NicAddrType_t currentNicType);
extern bool __NodeConnPool_shouldPrintConnectFailedLogMsg(NodeConnPool* this,
struct in_addr currentPeerIP, NicAddrType_t currentNicType);
extern void NodeConnPool_getStats(NodeConnPool* this, NodeConnPoolStats* outStats);
extern unsigned NodeConnPool_getFirstPeerName(NodeConnPool* this, NicAddrType_t nicType,
ssize_t outBufLen, char* outBuf, bool* outIsNonPrimary);
/**
* @param localNicList copied
* @param localNicCaps copied
* @param localRdmaNicList copied
*/
extern void NodeConnPool_updateLocalInterfaces(NodeConnPool* this, NicAddressList* localNicList,
NicListCapabilities* localNicCaps, NicAddressList* localRdmaNicList);
// getters & setters
/**
* Called to lock the internal mute when accessing resources that may be
* modified by other threads. Case in point: NodeConnPool_getNicListLocked.
*/
static inline void NodeConnPool_lock(NodeConnPool* this);
/**
* Release the lock acquired by NodeConnPool_lock.
*/
static inline void NodeConnPool_unlock(NodeConnPool* this);
static inline void NodeConnPool_cloneNicList(NodeConnPool* this, NicAddressList* nicList);
static inline NicAddressList* NodeConnPool_getNicListLocked(NodeConnPool* this);
static inline void NodeConnPool_setLocalNicCaps(NodeConnPool* this,
NicListCapabilities* localNicCaps);
static inline unsigned short NodeConnPool_getStreamPort(NodeConnPool* this);
static inline void NodeConnPool_setLogConnErrors(NodeConnPool* this, bool logConnErrors);
static inline bool __NodeConnPool_getWasLastTimeCompleteFail(NodeConnPool* this);
/**
* Holds statistics about currently established connections.
*/
struct NodeConnPoolStats
{
unsigned numEstablishedStd;
unsigned numEstablishedRDMA;
};
/**
* Holds state of failed connects to avoid log spamming with conn errors.
*/
struct NodeConnPoolErrorState
{
struct in_addr lastSuccessPeerIP; // the last IP that we connected to successfully
NicAddrType_t lastSuccessNicType; // the last nic type that we connected to successfully
bool wasLastTimeCompleteFail; // true if last attempt failed on all routes
};
/**
* This class represents a pool of stream connections to a certain node.
*/
struct NodeConnPool
{
struct App* app;
NicAddressList nicList;
NicAddressList localRdmaNicList;
NicAddressStatsList rdmaNicStatsList;
ConnectionList connList;
struct Node* parentNode; // backlink to the node object which to which this conn pool belongs
unsigned short streamPort;
NicListCapabilities localNicCaps;
unsigned availableConns; // available established conns
unsigned establishedConns; // not equal to connList.size!!
unsigned maxConns;
unsigned fallbackExpirationSecs; // expiration time for conns to fallback interfaces
unsigned maxConcurrentAttempts;
int rdmaNicCount;
NodeConnPoolStats stats;
NodeConnPoolErrorState errState;
bool logConnErrors; // false to disable logging during acquireStream()
bool enableTCPFallback;
Mutex mutex;
Condition changeCond;
struct semaphore connSemaphore;
};
void NodeConnPool_lock(NodeConnPool* this)
{
Mutex_lock(&this->mutex); // L O C K
}
void NodeConnPool_unlock(NodeConnPool* this)
{
Mutex_unlock(&this->mutex); // U N L O C K
}
/**
* Mutex lock must be held while using the returned pointer.
*/
NicAddressList* NodeConnPool_getNicListLocked(NodeConnPool* this)
{
return &this->nicList;
}
/**
* Retrieve NICs for the connection pool.
*
* @param nicList an uninitialized NicAddressList. Caller is responsible for
* memory management.
*/
void NodeConnPool_cloneNicList(NodeConnPool* this, NicAddressList* nicList)
{
Mutex_lock(&this->mutex); // L O C K
ListTk_cloneNicAddressList(&this->nicList, nicList, true);
Mutex_unlock(&this->mutex); // U N L O C K
}
/**
* @param localNicCaps will be copied
*/
void NodeConnPool_setLocalNicCaps(NodeConnPool* this, NicListCapabilities* localNicCaps)
{
Mutex_lock(&this->mutex); // L O C K
this->localNicCaps = *localNicCaps;
Mutex_unlock(&this->mutex); // U N L O C K
}
unsigned short NodeConnPool_getStreamPort(NodeConnPool* this)
{
unsigned short retVal;
Mutex_lock(&this->mutex); // L O C K
retVal = this->streamPort;
Mutex_unlock(&this->mutex); // U N L O C K
return retVal;
}
void NodeConnPool_setLogConnErrors(NodeConnPool* this, bool logConnErrors)
{
this->logConnErrors = logConnErrors;
}
/**
* @return true if connection on all available routes failed last time we tried.
*/
bool __NodeConnPool_getWasLastTimeCompleteFail(NodeConnPool* this)
{
return this->errState.wasLastTimeCompleteFail;
}
#endif /*NODECONNPOOL_H_*/

View File

@@ -0,0 +1,49 @@
#ifndef NODELIST_H_
#define NODELIST_H_
#include <common/toolkit/list/PointerList.h>
#include <common/Common.h>
struct Node;
struct NodeList;
typedef struct NodeList NodeList;
static inline void NodeList_init(NodeList* this);
static inline void NodeList_uninit(NodeList* this);
static inline void NodeList_append(NodeList* this, struct Node* node);
static inline void NodeList_removeHead(NodeList* this);
static inline size_t NodeList_length(NodeList* this);
struct NodeList
{
struct PointerList pointerList;
};
void NodeList_init(NodeList* this)
{
PointerList_init( (PointerList*)this);
}
void NodeList_uninit(NodeList* this)
{
PointerList_uninit( (PointerList*)this);
}
void NodeList_append(NodeList* this, struct Node* node)
{
PointerList_append( (PointerList*)this, node);
}
void NodeList_removeHead(NodeList* this)
{
PointerList_removeHead( (PointerList*)this);
}
static inline size_t NodeList_length(NodeList* this)
{
return PointerList_length( (PointerList*)this);
}
#endif /* NODELIST_H_ */

View File

@@ -0,0 +1,48 @@
#ifndef NODELISTITER_H_
#define NODELISTITER_H_
#include <common/toolkit/list/PointerListIter.h>
#include <common/Common.h>
#include "NodeList.h"
#include <common/nodes/Node.h>
struct Node;
struct NodeListIter;
typedef struct NodeListIter NodeListIter;
static inline void NodeListIter_init(NodeListIter* this, NodeList* list);
static inline void NodeListIter_next(NodeListIter* this);
static inline struct Node* NodeListIter_value(NodeListIter* this);
static inline bool NodeListIter_end(NodeListIter* this);
struct NodeListIter
{
PointerListIter pointerListIter;
};
void NodeListIter_init(NodeListIter* this, NodeList* list)
{
PointerListIter_init( (PointerListIter*)this, (PointerList*)list);
}
void NodeListIter_next(NodeListIter* this)
{
PointerListIter_next( (PointerListIter*)this);
}
struct Node* NodeListIter_value(NodeListIter* this)
{
return (struct Node*)PointerListIter_value( (PointerListIter*)this);
}
bool NodeListIter_end(NodeListIter* this)
{
return PointerListIter_end( (PointerListIter*)this);
}
#endif /* NODELISTITER_H_ */

View File

@@ -0,0 +1,94 @@
#include "NodeTree.h"
#include <os/OsCompat.h>
void NodeTree_init(NodeTree* this)
{
this->nodes = RB_ROOT;
this->size = 0;
}
void NodeTree_uninit(NodeTree* this)
{
Node* node;
Node* tmp;
// delete activeNodes. frees all memory, but leaves the rbtree in the shape it was - after this,
// any operation on the rbtree is undefined.
rbtree_postorder_for_each_entry_safe(node, tmp, &this->nodes, _nodeTree.rbTreeElement)
Node_put(node);
}
Node* NodeTree_find(NodeTree* this, NumNodeID nodeNumID)
{
struct rb_node* node = this->nodes.rb_node;
while(node)
{
Node* elem = rb_entry(node, Node, _nodeTree.rbTreeElement);
NumNodeID elemID = Node_getNumID(elem);
if(nodeNumID.value < elemID.value)
node = node->rb_left;
else
if(nodeNumID.value > elemID.value)
node = node->rb_right;
else
return elem;
}
return NULL;
}
Node* NodeTree_getNext(NodeTree* this, Node* node)
{
struct rb_node* rbNode = rb_next(&node->_nodeTree.rbTreeElement);
if(!rbNode)
return NULL;
return rb_entry(rbNode, Node, _nodeTree.rbTreeElement);
}
bool NodeTree_insert(NodeTree* this, NumNodeID nodeNumID, Node* node)
{
struct rb_node** new = &this->nodes.rb_node;
struct rb_node* parent = NULL;
while(*new)
{
Node* elem = rb_entry(*new, Node, _nodeTree.rbTreeElement);
NumNodeID elemID = Node_getNumID(elem);
parent = *new;
if(nodeNumID.value < elemID.value)
new = &(*new)->rb_left;
else
if(nodeNumID.value > elemID.value)
new = &(*new)->rb_right;
else
return false;
}
rb_link_node(&node->_nodeTree.rbTreeElement, parent, new);
rb_insert_color(&node->_nodeTree.rbTreeElement, &this->nodes);
this->size++;
return true;
}
bool NodeTree_erase(NodeTree* this, NumNodeID nodeNumID)
{
Node* node;
node = NodeTree_find(this, nodeNumID);
if(node)
{
rb_erase(&node->_nodeTree.rbTreeElement, &this->nodes);
this->size--;
Node_put(node);
return true;
}
return false;
}

View File

@@ -0,0 +1,64 @@
#ifndef NodeTree_h_cfEUS0RiTiwS20ayW10wHn
#define NodeTree_h_cfEUS0RiTiwS20ayW10wHn
#include <common/nodes/Node.h>
#include <linux/rbtree.h>
struct NodeTree;
typedef struct NodeTree NodeTree;
struct NodeTreeIter;
typedef struct NodeTreeIter NodeTreeIter;
extern void NodeTree_init(NodeTree* this);
extern void NodeTree_uninit(NodeTree* this);
extern Node* NodeTree_find(NodeTree* this, NumNodeID nodeNumID);
extern Node* NodeTree_getNext(NodeTree* this, Node* node);
extern bool NodeTree_insert(NodeTree* this, NumNodeID nodeNumID, Node* node);
extern bool NodeTree_erase(NodeTree* this, NumNodeID nodeNumID);
static inline void NodeTreeIter_init(NodeTreeIter* this, NodeTree* tree);
static inline void NodeTreeIter_next(NodeTreeIter* this);
static inline Node* NodeTreeIter_value(NodeTreeIter* this);
static inline bool NodeTreeIter_end(NodeTreeIter* this);
struct NodeTree
{
struct rb_root nodes;
unsigned size;
};
struct NodeTreeIter
{
struct rb_node* value;
};
void NodeTreeIter_init(NodeTreeIter* this, NodeTree* tree)
{
this->value = rb_first(&tree->nodes);
}
void NodeTreeIter_next(NodeTreeIter* this)
{
this->value = rb_next(this->value);
}
Node* NodeTreeIter_value(NodeTreeIter* this)
{
return rb_entry(this->value, Node, _nodeTree.rbTreeElement);
}
bool NodeTreeIter_end(NodeTreeIter* this)
{
return this->value == NULL;
}
#endif

View File

@@ -0,0 +1,15 @@
#include <common/toolkit/Serialization.h>
#include "NumNodeID.h"
void NumNodeID_serialize(SerializeCtx* ctx, const NumNodeID* this)
{
Serialization_serializeUInt(ctx, this->value);
}
bool NumNodeID_deserialize(DeserializeCtx* ctx, NumNodeID* outThis)
{
if(!Serialization_deserializeUInt(ctx, &(outThis->value) ) )
return false;
return true;
}

View File

@@ -0,0 +1,41 @@
#ifndef NUMNODEID_H
#define NUMNODEID_H
#include <common/storage/StorageDefinitions.h>
#include <common/toolkit/SerializationTypes.h>
#include <common/toolkit/StringTk.h>
// Note: this must always be in sync with server's NumNodeId!
struct NumNodeID;
typedef struct NumNodeID NumNodeID;
struct NumNodeID
{
uint32_t value;
};
static inline void NumNodeID_set(NumNodeID* this, uint32_t value)
{
this->value = value;
}
static inline bool NumNodeID_compare(const NumNodeID* this, const NumNodeID* other)
{
return (this->value == other->value);
}
static inline bool NumNodeID_isZero(const NumNodeID* this)
{
return (this->value == 0);
}
static inline char* NumNodeID_str(const NumNodeID* this)
{
return StringTk_uintToStr(this->value);
}
extern void NumNodeID_serialize(SerializeCtx* ctx, const NumNodeID* this);
extern bool NumNodeID_deserialize(DeserializeCtx* ctx, NumNodeID* outThis);
#endif /* NUMNODEID_H */

View File

@@ -0,0 +1,122 @@
#include "TargetMapper.h"
BEEGFS_RBTREE_FUNCTIONS(static, _TargetMapper, struct TargetMapper, _entries,
uint16_t,
struct TargetMapping, targetID, _node,
BEEGFS_RB_KEYCMP_LT_INTEGRAL)
void TargetMapper_init(TargetMapper* this)
{
RWLock_init(&this->rwlock);
this->_entries = RB_ROOT;
}
TargetMapper* TargetMapper_construct(void)
{
TargetMapper* this = (TargetMapper*)os_kmalloc(sizeof(*this) );
TargetMapper_init(this);
return this;
}
void TargetMapper_uninit(TargetMapper* this)
{
BEEGFS_KFREE_RBTREE(&this->_entries, struct TargetMapping, _node);
}
void TargetMapper_destruct(TargetMapper* this)
{
TargetMapper_uninit(this);
kfree(this);
}
/**
* Note: re-maps targetID if it was mapped before.
*
* @return true if the targetID was not mapped before
*/
bool TargetMapper_mapTarget(TargetMapper* this, uint16_t targetID,
NumNodeID nodeID)
{
struct TargetMapping* entry = kmalloc(sizeof(*entry), GFP_NOFS | __GFP_NOFAIL);
struct TargetMapping* replaced;
entry->targetID = targetID;
entry->nodeID = nodeID;
RWLock_writeLock(&this->rwlock);
replaced = _TargetMapper_insertOrReplace(this, entry);
RWLock_writeUnlock(&this->rwlock);
kfree(replaced);
return replaced == NULL;
}
/**
* Applies the mapping from two separate lists (keys and values).
*
* @mappings list of TargetMapping objects. the list is consumed.
*
* Note: Does not add/remove targets from attached capacity pools.
*/
void TargetMapper_syncTargets(TargetMapper* this, struct list_head* mappings)
{
struct TargetMapping* elem;
struct TargetMapping* n;
RWLock_writeLock(&this->rwlock); // L O C K
BEEGFS_KFREE_RBTREE(&this->_entries, struct TargetMapping, _node);
this->_entries = RB_ROOT;
list_for_each_entry_safe(elem, n, mappings, _list)
{
list_del(&elem->_list);
kfree(_TargetMapper_insertOrReplace(this, elem));
}
RWLock_writeUnlock(&this->rwlock); // U N L O C K
}
void TargetMapper_getTargetIDs(TargetMapper* this, UInt16List* outTargetIDs)
{
struct rb_node* pos;
RWLock_readLock(&this->rwlock); // L O C K
for (pos = rb_first(&this->_entries); pos; pos = rb_next(pos))
{
UInt16List_append(outTargetIDs,
rb_entry(pos, struct TargetMapping, _node)->targetID);
}
RWLock_readUnlock(&this->rwlock); // U N L O C K
}
/**
* Get nodeID for a certain targetID
*
* @return 0 if targetID was not mapped
*/
NumNodeID TargetMapper_getNodeID(TargetMapper* this, uint16_t targetID)
{
const struct TargetMapping* elem;
NumNodeID nodeID;
RWLock_readLock(&this->rwlock); // L O C K
elem = _TargetMapper_find(this, targetID);
if (elem)
nodeID = elem->nodeID;
else
nodeID.value = 0;
RWLock_readUnlock(&this->rwlock); // U N L O C K
return nodeID;
}

View File

@@ -0,0 +1,40 @@
#ifndef TARGETMAPPER_H_
#define TARGETMAPPER_H_
#include <app/App.h>
#include <common/toolkit/list/UInt16ListIter.h>
#include <common/Common.h>
#include <common/threading/RWLock.h>
#include <common/toolkit/Serialization.h>
#include <common/toolkit/StringTk.h>
#include <common/Types.h>
#include <linux/rbtree.h>
struct TargetMapper;
typedef struct TargetMapper TargetMapper;
extern void TargetMapper_init(TargetMapper* this);
extern TargetMapper* TargetMapper_construct(void);
extern void TargetMapper_uninit(TargetMapper* this);
extern void TargetMapper_destruct(TargetMapper* this);
extern bool TargetMapper_mapTarget(TargetMapper* this, uint16_t targetID,
NumNodeID nodeID);
extern void TargetMapper_syncTargets(TargetMapper* this, struct list_head* mappings);
extern void TargetMapper_getTargetIDs(TargetMapper* this, UInt16List* outTargetIDs);
extern NumNodeID TargetMapper_getNodeID(TargetMapper* this, uint16_t targetID);
struct TargetMapper
{
RWLock rwlock;
/* private: */
struct rb_root _entries; /* TargetMapping */
};
#endif /* TARGETMAPPER_H_ */

View File

@@ -0,0 +1,184 @@
#include "TargetStateStore.h"
BEEGFS_RBTREE_FUNCTIONS(static, _TargetStateStore, struct TargetStateStore, states,
uint16_t,
struct TargetStateInfo, targetID, _node,
BEEGFS_RB_KEYCMP_LT_INTEGRAL)
void TargetStateStore_init(TargetStateStore* this)
{
RWLock_init(&this->rwlock);
this->states = RB_ROOT;
}
TargetStateStore* TargetStateStore_construct(void)
{
TargetStateStore* this = (TargetStateStore*)os_kmalloc(sizeof(*this) );
if (likely(this) )
TargetStateStore_init(this);
return this;
}
void TargetStateStore_uninit(TargetStateStore* this)
{
BEEGFS_KFREE_RBTREE(&this->states, struct TargetStateInfo, _node);
}
void TargetStateStore_destruct(TargetStateStore* this)
{
TargetStateStore_uninit(this);
kfree(this);
}
/**
* Atomically update target states and buddy groups together. This is important to avoid races
* during a switch (e.g. where an outside viewer could see an offline primary).
*
* Of course, this implies that states and groups also have to be read atomically together via
* getStatesAndGroupsAsLists() through GetStatesAndBuddyGroupsMsg.
*/
void TargetStateStore_syncStatesAndGroupsFromLists(TargetStateStore* this, Config* config,
MirrorBuddyGroupMapper* buddyGroups, struct list_head* states, struct list_head* groups)
{
RWLock_writeLock(&this->rwlock); // L O C K targetStates
RWLock_writeLock(&buddyGroups->rwlock); // L O C K buddyGroups
__TargetStateStore_syncStatesUnlocked(this, states);
__MirrorBuddyGroupMapper_syncGroupsUnlocked(buddyGroups, config, groups);
RWLock_writeUnlock(&buddyGroups->rwlock); // U N L O C K buddyGroups
RWLock_writeUnlock(&this->rwlock); // U N L O C K targetStates
}
/**
* Note: Caller must hold writelock.
*/
void __TargetStateStore_syncStatesUnlocked(TargetStateStore* this, struct list_head* states)
{
struct TargetStateMapping* state;
// clear existing map
BEEGFS_KFREE_RBTREE(&this->states, struct TargetStateInfo, _node);
list_for_each_entry(state, states, _list)
{
CombinedTargetState currentState = { state->reachabilityState, state->consistencyState };
TargetStateInfo* stateInfo = kmalloc(sizeof(TargetStateInfo), GFP_NOFS | __GFP_NOFAIL);
stateInfo->targetID = state->targetID;
stateInfo->state = currentState;
kfree(_TargetStateStore_insertOrReplace(this, stateInfo));
}
}
/**
* @return pointer to static string (so don't free it)
*/
const char* TargetStateStore_reachabilityStateToStr(TargetReachabilityState state)
{
switch(state)
{
case TargetReachabilityState_ONLINE:
return "Online";
case TargetReachabilityState_POFFLINE:
return "Probably-offline";
case TargetReachabilityState_OFFLINE:
return "Offline";
default:
return "<invalid_value>";
}
}
/**
* @return pointer to static string (so don't free it)
*/
const char* TargetStateStore_consistencyStateToStr(TargetConsistencyState state)
{
switch(state)
{
case TargetConsistencyState_GOOD:
return "Good";
case TargetConsistencyState_NEEDS_RESYNC:
return "Needs-resync";
case TargetConsistencyState_BAD:
return "Bad";
default:
return "<invalid_value>";
}
}
void TargetStateStore_getStatesAsLists(TargetStateStore* this, UInt16List* outTargetIDs,
UInt8List* outReachabilityStates, UInt8List* outConsistencyStates)
{
struct TargetStateInfo* stateInfo;
RWLock_readLock(&this->rwlock); // L O C K
BEEGFS_RBTREE_FOR_EACH_ENTRY(stateInfo, &this->states, _node)
{
UInt16List_append(outTargetIDs, stateInfo->targetID);
UInt8List_append(outReachabilityStates, stateInfo->state.reachabilityState);
UInt8List_append(outConsistencyStates, stateInfo->state.consistencyState);
}
RWLock_readUnlock(&this->rwlock); // U N L O C K
}
bool TargetStateStore_setAllStates(TargetStateStore* this, TargetReachabilityState state)
{
bool res = false;
struct TargetStateInfo* stateInfo;
RWLock_writeLock(&this->rwlock); // L O C K
BEEGFS_RBTREE_FOR_EACH_ENTRY(stateInfo, &this->states, _node)
{
if (stateInfo->state.reachabilityState != state)
{
res = true;
stateInfo->state.reachabilityState = state;
}
}
RWLock_writeUnlock(&this->rwlock); // U N L O C K
return res;
}
bool TargetStateStore_getState(TargetStateStore* this, uint16_t targetID,
CombinedTargetState* state)
{
TargetStateInfo* stateInfo;
bool res;
RWLock_readLock(&this->rwlock); // L O C K
stateInfo = _TargetStateStore_find(this, targetID);
if(likely(stateInfo) )
{
*state = stateInfo->state;
res = true;
}
else
{
state->reachabilityState = TargetReachabilityState_OFFLINE;
state->consistencyState = TargetConsistencyState_GOOD;
res = false;
}
RWLock_readUnlock(&this->rwlock); // U N L O C K
return res;
}

View File

@@ -0,0 +1,41 @@
#ifndef TARGETSTATESTORE_H_
#define TARGETSTATESTORE_H_
#include <app/App.h>
#include <common/Common.h>
#include <common/Types.h>
#include <common/nodes/MirrorBuddyGroupMapper.h>
#include <common/threading/RWLock.h>
#include <common/toolkit/StringTk.h>
#include <common/toolkit/list/UInt8ListIter.h>
#include <common/toolkit/list/UInt16ListIter.h>
struct TargetStateStore;
typedef struct TargetStateStore TargetStateStore;
extern void TargetStateStore_init(TargetStateStore* this);
extern TargetStateStore* TargetStateStore_construct(void);
extern void TargetStateStore_uninit(TargetStateStore* this);
extern void TargetStateStore_destruct(TargetStateStore* this);
extern void TargetStateStore_syncStatesAndGroupsFromLists(TargetStateStore* this, Config* config,
MirrorBuddyGroupMapper* buddyGroups, struct list_head* states, struct list_head* groups);
extern void TargetStateStore_getStatesAsLists(TargetStateStore* this, UInt16List* outTargetIDs,
UInt8List* outReachabilityStates, UInt8List* outConsistencyStates);
extern const char* TargetStateStore_reachabilityStateToStr(TargetReachabilityState state);
extern const char* TargetStateStore_consistencyStateToStr(TargetConsistencyState state);
extern bool TargetStateStore_setAllStates(TargetStateStore* this,
TargetReachabilityState state);
extern void __TargetStateStore_syncStatesUnlocked(TargetStateStore* this, struct list_head* states);
extern bool TargetStateStore_getState(TargetStateStore* this, uint16_t targetID,
CombinedTargetState* state);
struct TargetStateStore
{
RWLock rwlock;
struct rb_root states; /* struct TargetStateInfo */
};
#endif /* TARGETSTATESTORE_H_ */