589 lines
16 KiB
C
589 lines
16 KiB
C
#include <common/net/sock/NetworkInterfaceCard.h>
|
|
#include <common/nodes/MirrorBuddyGroupMapper.h>
|
|
#include <common/nodes/NodeListIter.h>
|
|
#include <common/system/System.h>
|
|
#include <os/OsCompat.h>
|
|
#include "NodeStoreEx.h"
|
|
|
|
#define NODESTORE_WARN_REFNUM 2000
|
|
|
|
/**
|
|
* @param storeType will be applied to nodes on addOrUpdate()
|
|
*/
|
|
void NodeStoreEx_init(NodeStoreEx* this, App* app, NodeType storeType)
|
|
{
|
|
this->app = app;
|
|
|
|
RWLock_init(&this->rwLock);
|
|
|
|
NodeTree_init(&this->nodeTree);
|
|
|
|
this->newNodeAppeared = NULL;
|
|
|
|
this->_rootOwner = NodeOrGroup_fromGroup(0); // 0 means undefined/invalid
|
|
|
|
this->storeType = storeType;
|
|
}
|
|
|
|
NodeStoreEx* NodeStoreEx_construct(App* app, NodeType storeType)
|
|
{
|
|
NodeStoreEx* this = (NodeStoreEx*)os_kmalloc(sizeof(*this) );
|
|
|
|
NodeStoreEx_init(this, app, storeType);
|
|
|
|
return this;
|
|
}
|
|
|
|
void NodeStoreEx_uninit(NodeStoreEx* this)
|
|
{
|
|
NodeTree_uninit(&this->nodeTree);
|
|
}
|
|
|
|
void NodeStoreEx_destruct(NodeStoreEx* this)
|
|
{
|
|
NodeStoreEx_uninit(this);
|
|
|
|
kfree(this);
|
|
}
|
|
|
|
|
|
/**
|
|
* @param node belongs to the store after calling this method; this method will set (*node=NULL);
|
|
* so do not free it and don't use it any more afterwards (reference it from this store if you need
|
|
* it)
|
|
* @return true if the node was not in the store yet, false otherwise
|
|
*/
|
|
bool NodeStoreEx_addOrUpdateNode(NodeStoreEx* this, Node** node)
|
|
{
|
|
const char* logContext = __func__;
|
|
Logger* log = App_getLogger(this->app);
|
|
|
|
NumNodeID nodeNumID = Node_getNumID(*node);
|
|
NodeString incomingAlias;
|
|
NodeString activeAlias;
|
|
bool setAliasResult;
|
|
Node* active;
|
|
NicAddressList nicList;
|
|
Node_copyAlias(*node, &incomingAlias);
|
|
|
|
// check if numeric ID is defined
|
|
|
|
if(unlikely(!nodeNumID.value) )
|
|
{ // undefined numeric ID should never happen
|
|
Logger_logErrFormatted(log, logContext,
|
|
"Rejecting node with undefined numeric ID: %s; Type: %s",
|
|
incomingAlias.buf, Node_nodeTypeToStr(this->storeType) );
|
|
Node_put(*node);
|
|
*node = NULL;
|
|
return false;
|
|
}
|
|
|
|
RWLock_writeLock(&this->rwLock); // L O C K
|
|
|
|
// is node in any of the stores already?
|
|
|
|
active = NodeTree_find(&this->nodeTree, nodeNumID);
|
|
|
|
if(active)
|
|
{ // node was in the store already => update it
|
|
Node_copyAlias(active, &activeAlias);
|
|
// If the string IDs differ, update the current active node ID from the incoming node ID.
|
|
// Ignore if the incomingNodeID is empty, this can happen when a node first starts up
|
|
// because it has to download its own alias from the mgmtd.
|
|
if(incomingAlias.buf[0] != '\0' && strcmp(incomingAlias.buf, activeAlias.buf))
|
|
{
|
|
// Before 8.0 BeeGFS logged "numeric ID collision for two different node string IDs".
|
|
// Starting in 8.0 string IDs are considered aliases and can be updated as needed.
|
|
Logger_logFormatted(log, 3, logContext,
|
|
"Updating alias for node: %s -> %s; Type: %s",
|
|
activeAlias.buf, incomingAlias.buf, Node_nodeTypeToStr(this->storeType) );
|
|
// The node type should not be updated this way so we set it to invalid (no update).
|
|
setAliasResult = Node_setNodeAliasAndType(active, incomingAlias.buf, NODETYPE_Invalid);
|
|
|
|
if (!setAliasResult) {
|
|
NodeString nodeAndType;
|
|
Node_copyAliasWithTypeStr(active, &nodeAndType);
|
|
|
|
Logger_logErrFormatted(log, logContext,
|
|
// Partial updates should never happen. Print what is set for both the alias and node
|
|
// alias with type string. We don't know what may be helpful for debugging.
|
|
"Error updating alias for node: %s : %s (ignoring)",
|
|
activeAlias.buf, nodeAndType.buf);
|
|
}
|
|
}
|
|
|
|
// update heartbeat time of existing node
|
|
Node_cloneNicList(*node, &nicList);
|
|
Node_updateLastHeartbeatT(active);
|
|
Node_updateInterfaces(active, Node_getPortUDP(*node), Node_getPortTCP(*node),
|
|
&nicList);
|
|
ListTk_kfreeNicAddressListElems(&nicList);
|
|
NicAddressList_uninit(&nicList);
|
|
|
|
Node_put(*node);
|
|
}
|
|
else
|
|
{ // node is not currently active => insert it
|
|
NodeTree_insert(&this->nodeTree, nodeNumID, *node);
|
|
|
|
#ifdef BEEGFS_DEBUG
|
|
// check whether this node type and store type differ
|
|
if( (Node_getNodeType(*node) != NODETYPE_Invalid) &&
|
|
(Node_getNodeType(*node) != this->storeType) )
|
|
{
|
|
Logger_logErrFormatted(log, logContext,
|
|
"Node type and store type differ. Node: %s %s; Store: %s",
|
|
Node_getNodeTypeStr(*node), incomingAlias.buf, Node_nodeTypeToStr(this->storeType) );
|
|
}
|
|
#endif // BEEGFS_DEBUG
|
|
|
|
Node_setIsActive(*node, true);
|
|
Node_setNodeAliasAndType(*node, NULL, this->storeType);
|
|
|
|
__NodeStoreEx_handleNodeVersion(this, *node);
|
|
|
|
if(this->newNodeAppeared)
|
|
complete(this->newNodeAppeared);
|
|
}
|
|
|
|
RWLock_writeUnlock(&this->rwLock); // U N L O C K
|
|
|
|
*node = NULL;
|
|
|
|
return !active;
|
|
}
|
|
|
|
/**
|
|
* Note: remember to call releaseNode()
|
|
*
|
|
* @return NULL if no such node exists
|
|
*/
|
|
Node* NodeStoreEx_referenceNode(NodeStoreEx* this, NumNodeID id)
|
|
{
|
|
Logger* log = App_getLogger(this->app);
|
|
Node* node = NULL;
|
|
|
|
// check for invalid id 0
|
|
#ifdef BEEGFS_DEBUG
|
|
if(!id.value)
|
|
{
|
|
Logger_log(log, Log_CRITICAL, __func__, "BUG?: Attempt to reference numeric node ID '0'");
|
|
dump_stack();
|
|
}
|
|
#endif // BEEGFS_DEBUG
|
|
|
|
IGNORE_UNUSED_VARIABLE(log);
|
|
|
|
RWLock_readLock(&this->rwLock); // L O C K
|
|
|
|
node = NodeTree_find(&this->nodeTree, id);
|
|
if (likely(node))
|
|
{ // found it
|
|
unsigned refs;
|
|
Node_get(node);
|
|
|
|
(void) refs;
|
|
// check for unusually high reference count
|
|
#ifdef BEEGFS_DEBUG
|
|
# ifdef KERNEL_HAS_KREF_READ
|
|
refs = kref_read(&node->references);
|
|
# else
|
|
refs = atomic_read(&node->references.refcount);
|
|
#endif
|
|
|
|
if (refs > NODESTORE_WARN_REFNUM) {
|
|
NodeString alias;
|
|
Node_copyAlias(node, &alias);
|
|
Logger_logFormatted(log, Log_CRITICAL, __func__,
|
|
"WARNING: Lots of references to node (=> leak?): %s %s; ref count: %d",
|
|
Node_getNodeTypeStr(node), alias.buf, refs); // G E T
|
|
}
|
|
#endif // BEEGFS_DEBUG
|
|
}
|
|
|
|
RWLock_readUnlock(&this->rwLock); // U N L O C K
|
|
|
|
return node;
|
|
}
|
|
|
|
/**
|
|
* Note: remember to call releaseNode()
|
|
*
|
|
* @return NULL if no such node exists
|
|
*/
|
|
Node* NodeStoreEx_referenceRootNode(NodeStoreEx* this, NodeOrGroup* rootID)
|
|
{
|
|
Node* node = NULL;
|
|
MirrorBuddyGroupMapper* metaBuddyGroupMapper = App_getMetaBuddyGroupMapper(this->app);
|
|
NumNodeID nodeID;
|
|
|
|
RWLock_readLock(&this->rwLock); // L O C K
|
|
|
|
*rootID = this->_rootOwner;
|
|
if (this->_rootOwner.isGroup)
|
|
nodeID.value = MirrorBuddyGroupMapper_getPrimaryTargetID(metaBuddyGroupMapper,
|
|
this->_rootOwner.node.value);
|
|
else
|
|
nodeID = this->_rootOwner.node;
|
|
|
|
node = NodeTree_find(&this->nodeTree, nodeID);
|
|
if(likely(node) )
|
|
Node_get(node);
|
|
|
|
RWLock_readUnlock(&this->rwLock); // U N L O C K
|
|
|
|
return node;
|
|
}
|
|
|
|
/**
|
|
* This is a helper to have only one call for the typical targetMapper->getNodeID() and following
|
|
* referenceNode() calls.
|
|
*
|
|
* Note: remember to call releaseNode().
|
|
*
|
|
* @param targetMapper where to resolve the given targetID
|
|
* @param outErr will be set to FhgfsOpsErr_UNKNOWNNODE, _UNKNOWNTARGET, _SUCCESS (may be NULL)
|
|
* @return NULL if targetID is not mapped or if the mapped node does not exist in the store.
|
|
*/
|
|
Node* NodeStoreEx_referenceNodeByTargetID(NodeStoreEx* this, uint16_t targetID,
|
|
TargetMapper* targetMapper, FhgfsOpsErr* outErr)
|
|
{
|
|
NumNodeID nodeID;
|
|
Node* node;
|
|
|
|
nodeID = TargetMapper_getNodeID(targetMapper, targetID);
|
|
if(!nodeID.value)
|
|
{
|
|
SAFE_ASSIGN(outErr, FhgfsOpsErr_UNKNOWNTARGET);
|
|
return NULL;
|
|
}
|
|
|
|
node = NodeStoreEx_referenceNode(this, nodeID);
|
|
|
|
if(!node)
|
|
{
|
|
SAFE_ASSIGN(outErr, FhgfsOpsErr_UNKNOWNNODE);
|
|
return NULL;
|
|
}
|
|
|
|
SAFE_ASSIGN(outErr, FhgfsOpsErr_SUCCESS);
|
|
return node;
|
|
}
|
|
|
|
/**
|
|
* @return true if node existed as active node
|
|
*/
|
|
bool NodeStoreEx_deleteNode(NodeStoreEx* this, NumNodeID nodeID)
|
|
{
|
|
const char* logContext = __func__;
|
|
Logger* log = App_getLogger(this->app);
|
|
|
|
bool nodeWasActive;
|
|
|
|
#ifdef BEEGFS_DEBUG
|
|
if(unlikely(!nodeID.value) ) // should never happen
|
|
Logger_logFormatted(log, Log_CRITICAL, logContext, "Called with invalid node ID '0'");
|
|
#endif // BEEGFS_DEBUG
|
|
|
|
IGNORE_UNUSED_VARIABLE(logContext);
|
|
IGNORE_UNUSED_VARIABLE(log);
|
|
|
|
RWLock_writeLock(&this->rwLock); // L O C K
|
|
|
|
nodeWasActive = NodeTree_erase(&this->nodeTree, nodeID);
|
|
|
|
RWLock_writeUnlock(&this->rwLock); // U N L O C K
|
|
|
|
return nodeWasActive;
|
|
}
|
|
|
|
unsigned NodeStoreEx_getSize(NodeStoreEx* this)
|
|
{
|
|
unsigned nodesSize;
|
|
|
|
RWLock_readLock(&this->rwLock); // L O C K
|
|
|
|
nodesSize = this->nodeTree.size;
|
|
|
|
RWLock_readUnlock(&this->rwLock); // U N L O C K
|
|
|
|
return nodesSize;
|
|
}
|
|
|
|
/**
|
|
* This is used to iterate over all stored nodes.
|
|
* Start with this and then use referenceNextNode() until it returns NULL.
|
|
*
|
|
* Note: remember to call releaseNode()
|
|
*
|
|
* @return can be NULL
|
|
*/
|
|
Node* NodeStoreEx_referenceFirstNode(NodeStoreEx* this)
|
|
{
|
|
NodeTreeIter iter;
|
|
Node* resultNode = NULL;
|
|
|
|
RWLock_readLock(&this->rwLock); // L O C K
|
|
|
|
NodeTreeIter_init(&iter, &this->nodeTree);
|
|
if (!NodeTreeIter_end(&iter) )
|
|
{
|
|
resultNode = NodeTreeIter_value(&iter);
|
|
Node_get(resultNode);
|
|
}
|
|
|
|
RWLock_readUnlock(&this->rwLock); // U N L O C K
|
|
|
|
return resultNode;
|
|
}
|
|
|
|
/**
|
|
* Note: remember to call releaseNode()
|
|
*
|
|
* @return NULL if nodeID was the last node
|
|
*/
|
|
Node* NodeStoreEx_referenceNextNodeAndReleaseOld(NodeStoreEx* this, Node* oldNode)
|
|
{
|
|
Node* result = NULL;
|
|
|
|
RWLock_readLock(&this->rwLock); // L O C K
|
|
|
|
result = NodeTree_getNext(&this->nodeTree, oldNode);
|
|
if (result)
|
|
Node_get(result);
|
|
|
|
Node_put(oldNode);
|
|
|
|
RWLock_readUnlock(&this->rwLock); // U N L O C K
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* @return 0 if no root node is known
|
|
*/
|
|
NodeOrGroup NodeStoreEx_getRootOwner(NodeStoreEx* this)
|
|
{
|
|
NodeOrGroup owner;
|
|
|
|
RWLock_readLock(&this->rwLock); // L O C K
|
|
|
|
owner = this->_rootOwner;
|
|
|
|
RWLock_readUnlock(&this->rwLock); // U N L O C K
|
|
|
|
return owner;
|
|
}
|
|
|
|
/**
|
|
* Set internal root node ID.
|
|
*
|
|
* @return false if the new ID was rejected (e.g. because we already had an id set and
|
|
* ignoreExistingRoot was false).
|
|
*/
|
|
bool NodeStoreEx_setRootOwner(NodeStoreEx* this, NodeOrGroup owner,
|
|
bool ignoreExistingRoot)
|
|
{
|
|
bool setRootRes = true;
|
|
|
|
// don't allow invalid id 0 (if not forced to do so)
|
|
if(!owner.group && !ignoreExistingRoot)
|
|
return false;
|
|
|
|
RWLock_writeLock(&this->rwLock); // L O C K
|
|
|
|
if (!NodeOrGroup_valid(this->_rootOwner))
|
|
{ // rootID empty => set the new root
|
|
this->_rootOwner = owner;
|
|
}
|
|
else if (!ignoreExistingRoot)
|
|
{ // root defined already, reject new root
|
|
setRootRes = false;
|
|
}
|
|
else
|
|
{ // root defined already, but shall be ignored
|
|
this->_rootOwner = owner;
|
|
}
|
|
|
|
RWLock_writeUnlock(&this->rwLock); // U N L O C K
|
|
|
|
return setRootRes;
|
|
}
|
|
|
|
/**
|
|
* Waits for the first node that is added to the store.
|
|
*
|
|
* @return true when a new node was added to the store before the timeout expired
|
|
*/
|
|
bool NodeStoreEx_waitForFirstNode(NodeStoreEx* this, int waitTimeoutMS)
|
|
{
|
|
bool retVal = false;
|
|
struct completion cond;
|
|
|
|
RWLock_readLock(&this->rwLock); // L O C K
|
|
|
|
retVal = this->nodeTree.size > 0;
|
|
|
|
RWLock_readUnlock(&this->rwLock); // U N L O C K
|
|
|
|
if(retVal)
|
|
return retVal;
|
|
|
|
RWLock_writeLock(&this->rwLock); // L O C K
|
|
|
|
WARN_ON(this->newNodeAppeared);
|
|
init_completion(&cond);
|
|
this->newNodeAppeared = &cond;
|
|
|
|
RWLock_writeUnlock(&this->rwLock); // U N L O C K
|
|
|
|
/* may time out or not, we don't care. activeCount is what's important */
|
|
wait_for_completion_timeout(&cond, TimeTk_msToJiffiesSchedulable(waitTimeoutMS) );
|
|
|
|
RWLock_writeLock(&this->rwLock); // L O C K
|
|
|
|
this->newNodeAppeared = NULL;
|
|
retVal = this->nodeTree.size > 0;
|
|
|
|
RWLock_writeUnlock(&this->rwLock); // U N L O C K
|
|
|
|
return retVal;
|
|
}
|
|
|
|
|
|
/**
|
|
* @param masterList must be ordered; contained nodes will be removed and may no longer be
|
|
* accessed after calling this method.
|
|
* @param appLocalNode just what you get from app->getLocalNode(), to determine NIC capabilities
|
|
*/
|
|
void NodeStoreEx_syncNodes(NodeStoreEx* this, NodeList* masterList, NumNodeIDList* outAddedIDs,
|
|
NumNodeIDList* outRemovedIDs, Node* appLocalNode)
|
|
{
|
|
// Note: We have two phases here:
|
|
// Phase 1 (locked): Identify added/removed nodes.
|
|
// Phase 2 (unlocked): Add/remove nodes from store.
|
|
// This separation is required to not break compatibility with virtual overwritten add/remove
|
|
// methods in derived classes (e.g. fhgfs_mgmtd).
|
|
|
|
|
|
// P H A S E 1 (Identify added/removed nodes.)
|
|
|
|
NodeTreeIter activeIter;
|
|
NodeListIter masterIter;
|
|
|
|
NumNodeIDListIter removedIDsIter;
|
|
NodeList addLaterNodes; // nodes to be added in phase 2
|
|
NodeListIter addLaterIter;
|
|
|
|
NicListCapabilities localNicCaps;
|
|
|
|
NodeList_init(&addLaterNodes);
|
|
|
|
|
|
RWLock_writeLock(&this->rwLock); // L O C K
|
|
|
|
NodeTreeIter_init(&activeIter, &this->nodeTree);
|
|
NodeListIter_init(&masterIter, masterList);
|
|
|
|
while(!NodeTreeIter_end(&activeIter) && !NodeListIter_end(&masterIter) )
|
|
{
|
|
Node* active = NodeTreeIter_value(&activeIter);
|
|
NumNodeID currentActive = Node_getNumID(active);
|
|
NumNodeID currentMaster = Node_getNumID(NodeListIter_value(&masterIter) );
|
|
|
|
if(currentMaster.value < currentActive.value)
|
|
{ // currentMaster is added
|
|
NumNodeIDList_append(outAddedIDs, currentMaster);
|
|
NodeList_append(&addLaterNodes, NodeListIter_value(&masterIter) );
|
|
|
|
NodeList_removeHead(masterList);
|
|
NodeListIter_init(&masterIter, masterList);
|
|
}
|
|
else
|
|
if(currentActive.value < currentMaster.value)
|
|
{ // currentActive is removed
|
|
NumNodeIDList_append(outRemovedIDs, currentActive);
|
|
NodeTreeIter_next(&activeIter);
|
|
}
|
|
else
|
|
{ // node unchanged
|
|
NodeList_append(&addLaterNodes, NodeListIter_value(&masterIter) );
|
|
|
|
NodeTreeIter_next(&activeIter);
|
|
|
|
NodeList_removeHead(masterList);
|
|
NodeListIter_init(&masterIter, masterList);
|
|
}
|
|
}
|
|
|
|
// remaining masterList nodes are added
|
|
while(!NodeListIter_end(&masterIter) )
|
|
{
|
|
NumNodeID currentMaster = Node_getNumID(NodeListIter_value(&masterIter) );
|
|
|
|
NumNodeIDList_append(outAddedIDs, currentMaster);
|
|
NodeList_append(&addLaterNodes, NodeListIter_value(&masterIter) );
|
|
|
|
NodeList_removeHead(masterList);
|
|
NodeListIter_init(&masterIter, masterList);
|
|
}
|
|
|
|
// remaining active nodes are removed
|
|
for(; !NodeTreeIter_end(&activeIter); NodeTreeIter_next(&activeIter) )
|
|
{
|
|
Node* active = NodeTreeIter_value(&activeIter);
|
|
|
|
NumNodeIDList_append(outRemovedIDs, Node_getNumID(active) );
|
|
}
|
|
|
|
RWLock_writeUnlock(&this->rwLock); // U N L O C K
|
|
|
|
|
|
// P H A S E 2 (Add/remove nodes from store.)
|
|
|
|
// remove nodes
|
|
NumNodeIDListIter_init(&removedIDsIter, outRemovedIDs);
|
|
|
|
while(!NumNodeIDListIter_end(&removedIDsIter) )
|
|
{
|
|
NumNodeID nodeID = NumNodeIDListIter_value(&removedIDsIter);
|
|
NumNodeIDListIter_next(&removedIDsIter); // (removal invalidates iter)
|
|
|
|
NodeStoreEx_deleteNode(this, nodeID);
|
|
}
|
|
|
|
// set local nic capabilities
|
|
if(appLocalNode)
|
|
{
|
|
NodeConnPool* connPool = Node_getConnPool(appLocalNode);
|
|
NodeConnPool_lock(connPool);
|
|
NIC_supportedCapabilities(NodeConnPool_getNicListLocked(connPool), &localNicCaps);
|
|
NodeConnPool_unlock(connPool);
|
|
}
|
|
|
|
// add nodes
|
|
NodeListIter_init(&addLaterIter, &addLaterNodes);
|
|
|
|
for(; !NodeListIter_end(&addLaterIter); NodeListIter_next(&addLaterIter) )
|
|
{
|
|
Node* node = NodeListIter_value(&addLaterIter);
|
|
|
|
if(appLocalNode)
|
|
NodeConnPool_setLocalNicCaps(Node_getConnPool(node), &localNicCaps);
|
|
|
|
NodeStoreEx_addOrUpdateNode(this, &node);
|
|
}
|
|
|
|
NodeList_uninit(&addLaterNodes);
|
|
}
|
|
|
|
/**
|
|
* Take special actions based on version of a (typically new) node, e.g. compat flags deactivation.
|
|
*
|
|
* Note: Caller must hold lock.
|
|
*/
|
|
void __NodeStoreEx_handleNodeVersion(NodeStoreEx* this, Node* node)
|
|
{
|
|
// nothing to be done here currently
|
|
}
|