#include #include #include #include #include #include "NodeStoreEx.h" #define NODESTORE_WARN_REFNUM 2000 /** * @param storeType will be applied to nodes on addOrUpdate() */ void NodeStoreEx_init(NodeStoreEx* this, App* app, NodeType storeType) { this->app = app; RWLock_init(&this->rwLock); NodeTree_init(&this->nodeTree); this->newNodeAppeared = NULL; this->_rootOwner = NodeOrGroup_fromGroup(0); // 0 means undefined/invalid this->storeType = storeType; } NodeStoreEx* NodeStoreEx_construct(App* app, NodeType storeType) { NodeStoreEx* this = (NodeStoreEx*)os_kmalloc(sizeof(*this) ); NodeStoreEx_init(this, app, storeType); return this; } void NodeStoreEx_uninit(NodeStoreEx* this) { NodeTree_uninit(&this->nodeTree); } void NodeStoreEx_destruct(NodeStoreEx* this) { NodeStoreEx_uninit(this); kfree(this); } /** * @param node belongs to the store after calling this method; this method will set (*node=NULL); * so do not free it and don't use it any more afterwards (reference it from this store if you need * it) * @return true if the node was not in the store yet, false otherwise */ bool NodeStoreEx_addOrUpdateNode(NodeStoreEx* this, Node** node) { const char* logContext = __func__; Logger* log = App_getLogger(this->app); NumNodeID nodeNumID = Node_getNumID(*node); NodeString incomingAlias; NodeString activeAlias; bool setAliasResult; Node* active; NicAddressList nicList; Node_copyAlias(*node, &incomingAlias); // check if numeric ID is defined if(unlikely(!nodeNumID.value) ) { // undefined numeric ID should never happen Logger_logErrFormatted(log, logContext, "Rejecting node with undefined numeric ID: %s; Type: %s", incomingAlias.buf, Node_nodeTypeToStr(this->storeType) ); Node_put(*node); *node = NULL; return false; } RWLock_writeLock(&this->rwLock); // L O C K // is node in any of the stores already? active = NodeTree_find(&this->nodeTree, nodeNumID); if(active) { // node was in the store already => update it Node_copyAlias(active, &activeAlias); // If the string IDs differ, update the current active node ID from the incoming node ID. // Ignore if the incomingNodeID is empty, this can happen when a node first starts up // because it has to download its own alias from the mgmtd. if(incomingAlias.buf[0] != '\0' && strcmp(incomingAlias.buf, activeAlias.buf)) { // Before 8.0 BeeGFS logged "numeric ID collision for two different node string IDs". // Starting in 8.0 string IDs are considered aliases and can be updated as needed. Logger_logFormatted(log, 3, logContext, "Updating alias for node: %s -> %s; Type: %s", activeAlias.buf, incomingAlias.buf, Node_nodeTypeToStr(this->storeType) ); // The node type should not be updated this way so we set it to invalid (no update). setAliasResult = Node_setNodeAliasAndType(active, incomingAlias.buf, NODETYPE_Invalid); if (!setAliasResult) { NodeString nodeAndType; Node_copyAliasWithTypeStr(active, &nodeAndType); Logger_logErrFormatted(log, logContext, // Partial updates should never happen. Print what is set for both the alias and node // alias with type string. We don't know what may be helpful for debugging. "Error updating alias for node: %s : %s (ignoring)", activeAlias.buf, nodeAndType.buf); } } // update heartbeat time of existing node Node_cloneNicList(*node, &nicList); Node_updateLastHeartbeatT(active); Node_updateInterfaces(active, Node_getPortUDP(*node), Node_getPortTCP(*node), &nicList); ListTk_kfreeNicAddressListElems(&nicList); NicAddressList_uninit(&nicList); Node_put(*node); } else { // node is not currently active => insert it NodeTree_insert(&this->nodeTree, nodeNumID, *node); #ifdef BEEGFS_DEBUG // check whether this node type and store type differ if( (Node_getNodeType(*node) != NODETYPE_Invalid) && (Node_getNodeType(*node) != this->storeType) ) { Logger_logErrFormatted(log, logContext, "Node type and store type differ. Node: %s %s; Store: %s", Node_getNodeTypeStr(*node), incomingAlias.buf, Node_nodeTypeToStr(this->storeType) ); } #endif // BEEGFS_DEBUG Node_setIsActive(*node, true); Node_setNodeAliasAndType(*node, NULL, this->storeType); __NodeStoreEx_handleNodeVersion(this, *node); if(this->newNodeAppeared) complete(this->newNodeAppeared); } RWLock_writeUnlock(&this->rwLock); // U N L O C K *node = NULL; return !active; } /** * Note: remember to call releaseNode() * * @return NULL if no such node exists */ Node* NodeStoreEx_referenceNode(NodeStoreEx* this, NumNodeID id) { Logger* log = App_getLogger(this->app); Node* node = NULL; // check for invalid id 0 #ifdef BEEGFS_DEBUG if(!id.value) { Logger_log(log, Log_CRITICAL, __func__, "BUG?: Attempt to reference numeric node ID '0'"); dump_stack(); } #endif // BEEGFS_DEBUG IGNORE_UNUSED_VARIABLE(log); RWLock_readLock(&this->rwLock); // L O C K node = NodeTree_find(&this->nodeTree, id); if (likely(node)) { // found it unsigned refs; Node_get(node); (void) refs; // check for unusually high reference count #ifdef BEEGFS_DEBUG # ifdef KERNEL_HAS_KREF_READ refs = kref_read(&node->references); # else refs = atomic_read(&node->references.refcount); #endif if (refs > NODESTORE_WARN_REFNUM) { NodeString alias; Node_copyAlias(node, &alias); Logger_logFormatted(log, Log_CRITICAL, __func__, "WARNING: Lots of references to node (=> leak?): %s %s; ref count: %d", Node_getNodeTypeStr(node), alias.buf, refs); // G E T } #endif // BEEGFS_DEBUG } RWLock_readUnlock(&this->rwLock); // U N L O C K return node; } /** * Note: remember to call releaseNode() * * @return NULL if no such node exists */ Node* NodeStoreEx_referenceRootNode(NodeStoreEx* this, NodeOrGroup* rootID) { Node* node = NULL; MirrorBuddyGroupMapper* metaBuddyGroupMapper = App_getMetaBuddyGroupMapper(this->app); NumNodeID nodeID; RWLock_readLock(&this->rwLock); // L O C K *rootID = this->_rootOwner; if (this->_rootOwner.isGroup) nodeID.value = MirrorBuddyGroupMapper_getPrimaryTargetID(metaBuddyGroupMapper, this->_rootOwner.node.value); else nodeID = this->_rootOwner.node; node = NodeTree_find(&this->nodeTree, nodeID); if(likely(node) ) Node_get(node); RWLock_readUnlock(&this->rwLock); // U N L O C K return node; } /** * This is a helper to have only one call for the typical targetMapper->getNodeID() and following * referenceNode() calls. * * Note: remember to call releaseNode(). * * @param targetMapper where to resolve the given targetID * @param outErr will be set to FhgfsOpsErr_UNKNOWNNODE, _UNKNOWNTARGET, _SUCCESS (may be NULL) * @return NULL if targetID is not mapped or if the mapped node does not exist in the store. */ Node* NodeStoreEx_referenceNodeByTargetID(NodeStoreEx* this, uint16_t targetID, TargetMapper* targetMapper, FhgfsOpsErr* outErr) { NumNodeID nodeID; Node* node; nodeID = TargetMapper_getNodeID(targetMapper, targetID); if(!nodeID.value) { SAFE_ASSIGN(outErr, FhgfsOpsErr_UNKNOWNTARGET); return NULL; } node = NodeStoreEx_referenceNode(this, nodeID); if(!node) { SAFE_ASSIGN(outErr, FhgfsOpsErr_UNKNOWNNODE); return NULL; } SAFE_ASSIGN(outErr, FhgfsOpsErr_SUCCESS); return node; } /** * @return true if node existed as active node */ bool NodeStoreEx_deleteNode(NodeStoreEx* this, NumNodeID nodeID) { const char* logContext = __func__; Logger* log = App_getLogger(this->app); bool nodeWasActive; #ifdef BEEGFS_DEBUG if(unlikely(!nodeID.value) ) // should never happen Logger_logFormatted(log, Log_CRITICAL, logContext, "Called with invalid node ID '0'"); #endif // BEEGFS_DEBUG IGNORE_UNUSED_VARIABLE(logContext); IGNORE_UNUSED_VARIABLE(log); RWLock_writeLock(&this->rwLock); // L O C K nodeWasActive = NodeTree_erase(&this->nodeTree, nodeID); RWLock_writeUnlock(&this->rwLock); // U N L O C K return nodeWasActive; } unsigned NodeStoreEx_getSize(NodeStoreEx* this) { unsigned nodesSize; RWLock_readLock(&this->rwLock); // L O C K nodesSize = this->nodeTree.size; RWLock_readUnlock(&this->rwLock); // U N L O C K return nodesSize; } /** * This is used to iterate over all stored nodes. * Start with this and then use referenceNextNode() until it returns NULL. * * Note: remember to call releaseNode() * * @return can be NULL */ Node* NodeStoreEx_referenceFirstNode(NodeStoreEx* this) { NodeTreeIter iter; Node* resultNode = NULL; RWLock_readLock(&this->rwLock); // L O C K NodeTreeIter_init(&iter, &this->nodeTree); if (!NodeTreeIter_end(&iter) ) { resultNode = NodeTreeIter_value(&iter); Node_get(resultNode); } RWLock_readUnlock(&this->rwLock); // U N L O C K return resultNode; } /** * Note: remember to call releaseNode() * * @return NULL if nodeID was the last node */ Node* NodeStoreEx_referenceNextNodeAndReleaseOld(NodeStoreEx* this, Node* oldNode) { Node* result = NULL; RWLock_readLock(&this->rwLock); // L O C K result = NodeTree_getNext(&this->nodeTree, oldNode); if (result) Node_get(result); Node_put(oldNode); RWLock_readUnlock(&this->rwLock); // U N L O C K return result; } /** * @return 0 if no root node is known */ NodeOrGroup NodeStoreEx_getRootOwner(NodeStoreEx* this) { NodeOrGroup owner; RWLock_readLock(&this->rwLock); // L O C K owner = this->_rootOwner; RWLock_readUnlock(&this->rwLock); // U N L O C K return owner; } /** * Set internal root node ID. * * @return false if the new ID was rejected (e.g. because we already had an id set and * ignoreExistingRoot was false). */ bool NodeStoreEx_setRootOwner(NodeStoreEx* this, NodeOrGroup owner, bool ignoreExistingRoot) { bool setRootRes = true; // don't allow invalid id 0 (if not forced to do so) if(!owner.group && !ignoreExistingRoot) return false; RWLock_writeLock(&this->rwLock); // L O C K if (!NodeOrGroup_valid(this->_rootOwner)) { // rootID empty => set the new root this->_rootOwner = owner; } else if (!ignoreExistingRoot) { // root defined already, reject new root setRootRes = false; } else { // root defined already, but shall be ignored this->_rootOwner = owner; } RWLock_writeUnlock(&this->rwLock); // U N L O C K return setRootRes; } /** * Waits for the first node that is added to the store. * * @return true when a new node was added to the store before the timeout expired */ bool NodeStoreEx_waitForFirstNode(NodeStoreEx* this, int waitTimeoutMS) { bool retVal = false; struct completion cond; RWLock_readLock(&this->rwLock); // L O C K retVal = this->nodeTree.size > 0; RWLock_readUnlock(&this->rwLock); // U N L O C K if(retVal) return retVal; RWLock_writeLock(&this->rwLock); // L O C K WARN_ON(this->newNodeAppeared); init_completion(&cond); this->newNodeAppeared = &cond; RWLock_writeUnlock(&this->rwLock); // U N L O C K /* may time out or not, we don't care. activeCount is what's important */ wait_for_completion_timeout(&cond, TimeTk_msToJiffiesSchedulable(waitTimeoutMS) ); RWLock_writeLock(&this->rwLock); // L O C K this->newNodeAppeared = NULL; retVal = this->nodeTree.size > 0; RWLock_writeUnlock(&this->rwLock); // U N L O C K return retVal; } /** * @param masterList must be ordered; contained nodes will be removed and may no longer be * accessed after calling this method. * @param appLocalNode just what you get from app->getLocalNode(), to determine NIC capabilities */ void NodeStoreEx_syncNodes(NodeStoreEx* this, NodeList* masterList, NumNodeIDList* outAddedIDs, NumNodeIDList* outRemovedIDs, Node* appLocalNode) { // Note: We have two phases here: // Phase 1 (locked): Identify added/removed nodes. // Phase 2 (unlocked): Add/remove nodes from store. // This separation is required to not break compatibility with virtual overwritten add/remove // methods in derived classes (e.g. fhgfs_mgmtd). // P H A S E 1 (Identify added/removed nodes.) NodeTreeIter activeIter; NodeListIter masterIter; NumNodeIDListIter removedIDsIter; NodeList addLaterNodes; // nodes to be added in phase 2 NodeListIter addLaterIter; NicListCapabilities localNicCaps; NodeList_init(&addLaterNodes); RWLock_writeLock(&this->rwLock); // L O C K NodeTreeIter_init(&activeIter, &this->nodeTree); NodeListIter_init(&masterIter, masterList); while(!NodeTreeIter_end(&activeIter) && !NodeListIter_end(&masterIter) ) { Node* active = NodeTreeIter_value(&activeIter); NumNodeID currentActive = Node_getNumID(active); NumNodeID currentMaster = Node_getNumID(NodeListIter_value(&masterIter) ); if(currentMaster.value < currentActive.value) { // currentMaster is added NumNodeIDList_append(outAddedIDs, currentMaster); NodeList_append(&addLaterNodes, NodeListIter_value(&masterIter) ); NodeList_removeHead(masterList); NodeListIter_init(&masterIter, masterList); } else if(currentActive.value < currentMaster.value) { // currentActive is removed NumNodeIDList_append(outRemovedIDs, currentActive); NodeTreeIter_next(&activeIter); } else { // node unchanged NodeList_append(&addLaterNodes, NodeListIter_value(&masterIter) ); NodeTreeIter_next(&activeIter); NodeList_removeHead(masterList); NodeListIter_init(&masterIter, masterList); } } // remaining masterList nodes are added while(!NodeListIter_end(&masterIter) ) { NumNodeID currentMaster = Node_getNumID(NodeListIter_value(&masterIter) ); NumNodeIDList_append(outAddedIDs, currentMaster); NodeList_append(&addLaterNodes, NodeListIter_value(&masterIter) ); NodeList_removeHead(masterList); NodeListIter_init(&masterIter, masterList); } // remaining active nodes are removed for(; !NodeTreeIter_end(&activeIter); NodeTreeIter_next(&activeIter) ) { Node* active = NodeTreeIter_value(&activeIter); NumNodeIDList_append(outRemovedIDs, Node_getNumID(active) ); } RWLock_writeUnlock(&this->rwLock); // U N L O C K // P H A S E 2 (Add/remove nodes from store.) // remove nodes NumNodeIDListIter_init(&removedIDsIter, outRemovedIDs); while(!NumNodeIDListIter_end(&removedIDsIter) ) { NumNodeID nodeID = NumNodeIDListIter_value(&removedIDsIter); NumNodeIDListIter_next(&removedIDsIter); // (removal invalidates iter) NodeStoreEx_deleteNode(this, nodeID); } // set local nic capabilities if(appLocalNode) { NodeConnPool* connPool = Node_getConnPool(appLocalNode); NodeConnPool_lock(connPool); NIC_supportedCapabilities(NodeConnPool_getNicListLocked(connPool), &localNicCaps); NodeConnPool_unlock(connPool); } // add nodes NodeListIter_init(&addLaterIter, &addLaterNodes); for(; !NodeListIter_end(&addLaterIter); NodeListIter_next(&addLaterIter) ) { Node* node = NodeListIter_value(&addLaterIter); if(appLocalNode) NodeConnPool_setLocalNicCaps(Node_getConnPool(node), &localNicCaps); NodeStoreEx_addOrUpdateNode(this, &node); } NodeList_uninit(&addLaterNodes); } /** * Take special actions based on version of a (typically new) node, e.g. compat flags deactivation. * * Note: Caller must hold lock. */ void __NodeStoreEx_handleNodeVersion(NodeStoreEx* this, Node* node) { // nothing to be done here currently }