1222 lines
36 KiB
C
1222 lines
36 KiB
C
#include <app/App.h>
|
|
#include <net/filesystem/FhgfsOpsRemoting.h>
|
|
#include <common/net/message/control/AckMsgEx.h>
|
|
#include <common/net/message/nodes/HeartbeatMsgEx.h>
|
|
#include <common/net/message/nodes/HeartbeatRequestMsgEx.h>
|
|
#include <common/net/message/nodes/RegisterNodeMsg.h>
|
|
#include <common/net/message/nodes/RegisterNodeRespMsg.h>
|
|
#include <common/net/message/nodes/RemoveNodeMsgEx.h>
|
|
#include <common/net/message/nodes/RemoveNodeRespMsg.h>
|
|
#include <common/toolkit/MessagingTk.h>
|
|
#include <common/toolkit/NodesTk.h>
|
|
#include <common/toolkit/Random.h>
|
|
#include <common/toolkit/Time.h>
|
|
#include "InternodeSyncer.h"
|
|
|
|
|
|
void InternodeSyncer_init(InternodeSyncer* this, App* app)
|
|
{
|
|
// call super constructor
|
|
Thread_init( (Thread*)this, BEEGFS_THREAD_NAME_PREFIX_STR "XNodeSync", __InternodeSyncer_run);
|
|
|
|
this->forceTargetStatesUpdate = false;
|
|
|
|
this->app = app;
|
|
this->cfg = App_getConfig(app);
|
|
|
|
this->dgramLis = App_getDatagramListener(app);
|
|
|
|
this->mgmtNodes = App_getMgmtNodes(app);
|
|
this->metaNodes = App_getMetaNodes(app);
|
|
this->storageNodes = App_getStorageNodes(app);
|
|
|
|
this->nodeRegistered = false;
|
|
this->mgmtInitDone = false;
|
|
|
|
Mutex_init(&this->mgmtInitDoneMutex);
|
|
Condition_init(&this->mgmtInitDoneCond);
|
|
|
|
Mutex_init(&this->delayedCloseMutex);
|
|
PointerList_init(&this->delayedCloseQueue);
|
|
|
|
Mutex_init(&this->delayedEntryUnlockMutex);
|
|
PointerList_init(&this->delayedEntryUnlockQueue);
|
|
|
|
Mutex_init(&this->delayedRangeUnlockMutex);
|
|
PointerList_init(&this->delayedRangeUnlockQueue);
|
|
|
|
Mutex_init(&this->forceTargetStatesUpdateMutex);
|
|
Time_init(&this->lastSuccessfulTargetStatesUpdateT);
|
|
this->targetOfflineTimeoutMS = Config_getSysTargetOfflineTimeoutSecs(this->cfg) * 1000;
|
|
}
|
|
|
|
struct InternodeSyncer* InternodeSyncer_construct(App* app)
|
|
{
|
|
struct InternodeSyncer* this = (InternodeSyncer*)os_kmalloc(sizeof(*this) );
|
|
|
|
if(likely(this) )
|
|
InternodeSyncer_init(this, app);
|
|
|
|
return this;
|
|
}
|
|
|
|
void InternodeSyncer_uninit(InternodeSyncer* this)
|
|
{
|
|
PointerListIter iter;
|
|
|
|
// free delayed close Q elements
|
|
|
|
PointerListIter_init(&iter, &this->delayedCloseQueue);
|
|
|
|
while(!PointerListIter_end(&iter) )
|
|
{
|
|
__InternodeSyncer_delayedCloseFreeEntry(this,
|
|
(DelayedCloseEntry*)PointerListIter_value(&iter) );
|
|
PointerListIter_next(&iter);
|
|
}
|
|
|
|
PointerList_uninit(&this->delayedCloseQueue);
|
|
Mutex_uninit(&this->delayedCloseMutex);
|
|
|
|
// free delayed entry unlock Q elements
|
|
|
|
PointerListIter_init(&iter, &this->delayedEntryUnlockQueue);
|
|
|
|
while(!PointerListIter_end(&iter) )
|
|
{
|
|
__InternodeSyncer_delayedEntryUnlockFreeEntry(this,
|
|
(DelayedEntryUnlockEntry*)PointerListIter_value(&iter) );
|
|
PointerListIter_next(&iter);
|
|
}
|
|
|
|
PointerList_uninit(&this->delayedEntryUnlockQueue);
|
|
Mutex_uninit(&this->delayedEntryUnlockMutex);
|
|
|
|
// free delayed range unlock Q elements
|
|
|
|
PointerListIter_init(&iter, &this->delayedRangeUnlockQueue);
|
|
|
|
while(!PointerListIter_end(&iter) )
|
|
{
|
|
__InternodeSyncer_delayedRangeUnlockFreeEntry(this,
|
|
(DelayedRangeUnlockEntry*)PointerListIter_value(&iter) );
|
|
PointerListIter_next(&iter);
|
|
}
|
|
|
|
PointerList_uninit(&this->delayedRangeUnlockQueue);
|
|
Mutex_uninit(&this->delayedRangeUnlockMutex);
|
|
|
|
Mutex_uninit(&this->forceTargetStatesUpdateMutex);
|
|
|
|
Mutex_uninit(&this->mgmtInitDoneMutex);
|
|
|
|
Thread_uninit( (Thread*)this);
|
|
}
|
|
|
|
void InternodeSyncer_destruct(InternodeSyncer* this)
|
|
{
|
|
InternodeSyncer_uninit(this);
|
|
|
|
kfree(this);
|
|
}
|
|
|
|
void __InternodeSyncer_run(Thread* this)
|
|
{
|
|
InternodeSyncer* thisCast = (InternodeSyncer*)this;
|
|
|
|
const char* logContext = "InternodeSyncer (run)";
|
|
Logger* log = App_getLogger(thisCast->app);
|
|
|
|
Logger_logFormatted(log, Log_DEBUG, logContext, "Searching for nodes...");
|
|
|
|
__InternodeSyncer_mgmtInit(thisCast);
|
|
|
|
_InternodeSyncer_requestLoop(thisCast);
|
|
|
|
__InternodeSyncer_signalMgmtInitDone(thisCast);
|
|
|
|
if(thisCast->nodeRegistered)
|
|
__InternodeSyncer_unregisterNode(thisCast);
|
|
|
|
Logger_log(log, Log_DEBUG, logContext, "Component stopped.");
|
|
}
|
|
|
|
void _InternodeSyncer_requestLoop(InternodeSyncer* this)
|
|
{
|
|
const unsigned sleepTimeMS = 5*1000;
|
|
|
|
const unsigned mgmtInitIntervalMS = 5000;
|
|
const unsigned reregisterIntervalMS = 60*1000; /* send heartbeats to mgmt in this interval. must
|
|
be a lot lower than tuneClientAutoRemoveMins of mgmt. the value is capped to at least 5
|
|
(or disabled) on mgmt. */
|
|
const unsigned downloadNodesIntervalMS = 180000;
|
|
const unsigned updateTargetStatesMS = Config_getSysUpdateTargetStatesSecs(this->cfg) * 1000;
|
|
const unsigned delayedOpsIntervalMS = 60*1000;
|
|
const unsigned idleDisconnectIntervalMS = 70*60*1000; /* 70 minutes (must be
|
|
less than half the server-side streamlis idle disconnect interval to avoid
|
|
server disconnecting first) */
|
|
const unsigned checkNetworkIntervalMS = 60*1000; // 1 minute
|
|
|
|
Time lastMgmtInitT;
|
|
Time lastReregisterT;
|
|
Time lastDownloadNodesT;
|
|
|
|
Time lastDelayedOpsT;
|
|
Time lastIdleDisconnectT;
|
|
Time lastTargetStatesUpdateT;
|
|
Time lastCheckNetworkT;
|
|
|
|
Thread* thisThread = (Thread*)this;
|
|
|
|
Time_init(&lastMgmtInitT);
|
|
Time_init(&lastReregisterT);
|
|
Time_init(&lastDownloadNodesT);
|
|
|
|
Time_init(&lastDelayedOpsT);
|
|
Time_init(&lastIdleDisconnectT);
|
|
Time_init(&lastTargetStatesUpdateT);
|
|
Time_init(&lastCheckNetworkT);
|
|
|
|
while(!_Thread_waitForSelfTerminateOrder(thisThread, sleepTimeMS) )
|
|
{
|
|
bool targetStatesUpdateForced =
|
|
InternodeSyncer_getAndResetForceTargetStatesUpdate(this);
|
|
|
|
// mgmt init
|
|
if(!this->mgmtInitDone)
|
|
{
|
|
if(Time_elapsedMS(&lastMgmtInitT) > mgmtInitIntervalMS)
|
|
{
|
|
__InternodeSyncer_mgmtInit(this);
|
|
|
|
Time_setToNow(&lastMgmtInitT);
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
// everything below only happens after successful management init...
|
|
|
|
// check for NIC changes
|
|
if(Time_elapsedMS(&lastCheckNetworkT) > checkNetworkIntervalMS)
|
|
{
|
|
if (__InternodeSyncer_checkNetwork(this))
|
|
Time_setZero(&lastReregisterT);
|
|
Time_setToNow(&lastCheckNetworkT);
|
|
}
|
|
|
|
// re-register
|
|
if(Time_elapsedMS(&lastReregisterT) > reregisterIntervalMS)
|
|
{
|
|
__InternodeSyncer_reregisterNode(this);
|
|
|
|
Time_setToNow(&lastReregisterT);
|
|
}
|
|
|
|
// download & sync nodes
|
|
if(Time_elapsedMS(&lastDownloadNodesT) > downloadNodesIntervalMS)
|
|
{
|
|
__InternodeSyncer_downloadAndSyncNodes(this);
|
|
__InternodeSyncer_downloadAndSyncTargetMappings(this);
|
|
|
|
Time_setToNow(&lastDownloadNodesT);
|
|
}
|
|
|
|
// download target states
|
|
if( targetStatesUpdateForced ||
|
|
(Time_elapsedMS(&lastTargetStatesUpdateT) > updateTargetStatesMS) )
|
|
{
|
|
__InternodeSyncer_updateTargetStatesAndBuddyGroups(this, NODETYPE_Meta);
|
|
__InternodeSyncer_updateTargetStatesAndBuddyGroups(this, NODETYPE_Storage);
|
|
|
|
Time_setToNow(&lastTargetStatesUpdateT);
|
|
}
|
|
|
|
// delayed operations (that were left due to interruption by signal or network errors)
|
|
if(Time_elapsedMS(&lastDelayedOpsT) > delayedOpsIntervalMS)
|
|
{
|
|
__InternodeSyncer_delayedEntryUnlockComm(this);
|
|
__InternodeSyncer_delayedRangeUnlockComm(this);
|
|
__InternodeSyncer_delayedCloseComm(this);
|
|
|
|
Time_setToNow(&lastDelayedOpsT);
|
|
}
|
|
|
|
// drop idle connections
|
|
if(Time_elapsedMS(&lastIdleDisconnectT) > idleDisconnectIntervalMS)
|
|
{
|
|
__InternodeSyncer_dropIdleConns(this);
|
|
|
|
Time_setToNow(&lastIdleDisconnectT);
|
|
}
|
|
}
|
|
}
|
|
|
|
void __InternodeSyncer_signalMgmtInitDone(InternodeSyncer* this)
|
|
{
|
|
Mutex_lock(&this->mgmtInitDoneMutex);
|
|
|
|
this->mgmtInitDone = true;
|
|
|
|
Condition_broadcast(&this->mgmtInitDoneCond);
|
|
|
|
Mutex_unlock(&this->mgmtInitDoneMutex);
|
|
}
|
|
|
|
void __InternodeSyncer_mgmtInit(InternodeSyncer* this)
|
|
{
|
|
static bool waitForMgmtLogged = false; // to avoid log spamming
|
|
|
|
const char* logContext = "Init";
|
|
Logger* log = App_getLogger(this->app);
|
|
|
|
if(!waitForMgmtLogged)
|
|
{
|
|
const char* hostname = Config_getSysMgmtdHost(this->cfg);
|
|
unsigned short port = Config_getConnMgmtdPort(this->cfg);
|
|
|
|
Logger_logFormatted(log, Log_WARNING, logContext,
|
|
"Waiting for beegfs-mgmtd@%s:%hu...", hostname, port);
|
|
waitForMgmtLogged = true;
|
|
}
|
|
|
|
if(!__InternodeSyncer_waitForMgmtHeartbeat(this) )
|
|
return;
|
|
|
|
Logger_log(log, Log_NOTICE, logContext, "Management node found. Downloading node groups...");
|
|
|
|
__InternodeSyncer_downloadAndSyncNodes(this);
|
|
__InternodeSyncer_downloadAndSyncTargetMappings(this);
|
|
__InternodeSyncer_updateTargetStatesAndBuddyGroups(this, NODETYPE_Storage);
|
|
__InternodeSyncer_updateTargetStatesAndBuddyGroups(this, NODETYPE_Meta);
|
|
|
|
Logger_log(log, Log_NOTICE, logContext, "Node registration...");
|
|
|
|
if(__InternodeSyncer_registerNode(this) )
|
|
{ // download nodes again now that we will receive notifications about add/remove (avoids race)
|
|
__InternodeSyncer_downloadAndSyncNodes(this);
|
|
__InternodeSyncer_downloadAndSyncTargetMappings(this);
|
|
__InternodeSyncer_updateTargetStatesAndBuddyGroups(this, NODETYPE_Storage);
|
|
__InternodeSyncer_updateTargetStatesAndBuddyGroups(this, NODETYPE_Meta);
|
|
}
|
|
|
|
__InternodeSyncer_signalMgmtInitDone(this);
|
|
|
|
Logger_log(log, Log_NOTICE, logContext, "Init complete.");
|
|
}
|
|
|
|
|
|
/**
|
|
* @param timeoutMS 0 to wait infinitely (which is probably never what you want)
|
|
*/
|
|
bool InternodeSyncer_waitForMgmtInit(InternodeSyncer* this, int timeoutMS)
|
|
{
|
|
bool retVal;
|
|
|
|
Mutex_lock(&this->mgmtInitDoneMutex);
|
|
|
|
if(!timeoutMS)
|
|
{
|
|
while(!this->mgmtInitDone)
|
|
Condition_wait(&this->mgmtInitDoneCond, &this->mgmtInitDoneMutex);
|
|
}
|
|
else
|
|
if(!this->mgmtInitDone)
|
|
Condition_timedwait(&this->mgmtInitDoneCond, &this->mgmtInitDoneMutex, timeoutMS);
|
|
|
|
retVal = this->mgmtInitDone;
|
|
|
|
Mutex_unlock(&this->mgmtInitDoneMutex);
|
|
|
|
return retVal;
|
|
}
|
|
|
|
bool __InternodeSyncer_waitForMgmtHeartbeat(InternodeSyncer* this)
|
|
{
|
|
const int waitTimeoutMS = 400;
|
|
const int numRetries = 1;
|
|
|
|
char heartbeatReqBuf[NETMSG_MIN_LENGTH];
|
|
Thread* thisThread = (Thread*)this;
|
|
|
|
const char* hostname = Config_getSysMgmtdHost(this->cfg);
|
|
unsigned short port = Config_getConnMgmtdPort(this->cfg);
|
|
struct in_addr ipAddr;
|
|
int i;
|
|
|
|
HeartbeatRequestMsgEx msg;
|
|
|
|
if(NodeStoreEx_getSize(this->mgmtNodes) )
|
|
return true;
|
|
|
|
|
|
// prepare request message
|
|
|
|
HeartbeatRequestMsgEx_init(&msg);
|
|
NetMessage_serialize( (NetMessage*)&msg, heartbeatReqBuf, NETMSG_MIN_LENGTH);
|
|
|
|
|
|
// resolve name, send message, wait for incoming heartbeat
|
|
|
|
if (!SocketTk_getHostByAddrStr(hostname, &ipAddr)) {
|
|
return false;
|
|
}
|
|
|
|
for(i=0; (i <= numRetries) && !Thread_getSelfTerminate(thisThread); i++)
|
|
{
|
|
int tryTimeoutWarpMS = Random_getNextInRange(-(waitTimeoutMS/4), waitTimeoutMS/4);
|
|
|
|
DatagramListener_sendtoIP_kernel(this->dgramLis, heartbeatReqBuf, sizeof heartbeatReqBuf, 0,
|
|
ipAddr, port);
|
|
|
|
if(NodeStoreEx_waitForFirstNode(this->mgmtNodes, waitTimeoutMS + tryTimeoutWarpMS) )
|
|
{
|
|
return true; // heartbeat received => we're done
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* @return true if an ack was received for the heartbeat, false otherwise
|
|
*/
|
|
bool __InternodeSyncer_registerNode(InternodeSyncer* this)
|
|
{
|
|
static bool registrationFailureLogged = false; // to avoid log spamming
|
|
|
|
const char* logContext = "Registration";
|
|
Config* cfg = App_getConfig(this->app);
|
|
Logger* log = App_getLogger(this->app);
|
|
|
|
Node* mgmtNode;
|
|
|
|
NoAllocBufferStore* bufStore = App_getMsgBufStore(this->app);
|
|
Node* localNode = App_getLocalNode(this->app);
|
|
NodeString alias;
|
|
NumNodeID localNodeNumID = Node_getNumID(localNode);
|
|
NumNodeID newLocalNodeNumID;
|
|
NicAddressList nicList;
|
|
|
|
RegisterNodeMsg msg;
|
|
char* respBuf;
|
|
NetMessage* respMsg;
|
|
FhgfsOpsErr requestRes;
|
|
RegisterNodeRespMsg* registerResp;
|
|
bool result;
|
|
|
|
Node_cloneNicList(localNode, &nicList);
|
|
mgmtNode = NodeStoreEx_referenceFirstNode(this->mgmtNodes);
|
|
if(!mgmtNode)
|
|
{
|
|
result = false;
|
|
goto exit;
|
|
}
|
|
|
|
Node_copyAlias(localNode, &alias);
|
|
RegisterNodeMsg_initFromNodeData(&msg, alias.buf, localNodeNumID, NODETYPE_Client,
|
|
&nicList, Config_getConnClientPort(this->cfg));
|
|
|
|
// connect & communicate
|
|
requestRes = MessagingTk_requestResponse(this->app, mgmtNode,
|
|
(NetMessage*)&msg, NETMSGTYPE_RegisterNodeResp, &respBuf, &respMsg);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // request/response failed
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
registerResp = (RegisterNodeRespMsg*)respMsg;
|
|
newLocalNodeNumID = RegisterNodeRespMsg_getNodeNumID(registerResp);
|
|
Config_setConnMgmtdGrpcPort(cfg, RegisterNodeRespMsg_getGrpcPort(registerResp));
|
|
App_updateFsUUID(this->app, RegisterNodeRespMsg_getFsUUID(registerResp));
|
|
|
|
if (newLocalNodeNumID.value == 0)
|
|
{
|
|
Logger_log(log, Log_CRITICAL, logContext,
|
|
"Unable to register at management daemon. No valid numeric node ID retrieved.");
|
|
}
|
|
else
|
|
{
|
|
Node_setNumID(localNode, newLocalNodeNumID);
|
|
|
|
this->nodeRegistered = true;
|
|
}
|
|
|
|
// clean-up
|
|
NETMESSAGE_FREE(respMsg);
|
|
NoAllocBufferStore_addBuf(bufStore, respBuf);
|
|
|
|
cleanup_request:
|
|
Node_put(mgmtNode);
|
|
|
|
// log registration result
|
|
if(this->nodeRegistered)
|
|
Logger_log(log, Log_WARNING, logContext, "Node registration successful.");
|
|
else
|
|
if(!registrationFailureLogged)
|
|
{
|
|
Logger_log(log, Log_CRITICAL, logContext,
|
|
"Node registration failed. Management node offline? Will keep on trying...");
|
|
registrationFailureLogged = true;
|
|
}
|
|
|
|
result = this->nodeRegistered;
|
|
|
|
exit:
|
|
|
|
ListTk_kfreeNicAddressListElems(&nicList);
|
|
NicAddressList_uninit(&nicList);
|
|
|
|
return result;
|
|
}
|
|
|
|
bool __InternodeSyncer_checkNetwork(InternodeSyncer* this)
|
|
{
|
|
Logger* log = App_getLogger(this->app);
|
|
NicAddressList newNicList;
|
|
NodeConnPool* connPool = Node_getConnPool(App_getLocalNode(this->app));
|
|
bool result = false;
|
|
|
|
if (App_findAllowedInterfaces(this->app, &newNicList))
|
|
{
|
|
NodeConnPool_lock(connPool);
|
|
result = !NicAddressList_equals(&newNicList, NodeConnPool_getNicListLocked(connPool));
|
|
NodeConnPool_unlock(connPool);
|
|
if (result)
|
|
{
|
|
Logger_log(log, Log_NOTICE, "checkNetwork", "Local interfaces have changed.");
|
|
App_updateLocalInterfaces(this->app, &newNicList);
|
|
}
|
|
ListTk_kfreeNicAddressListElems(&newNicList);
|
|
NicAddressList_uninit(&newNicList);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Note: This just sends a heartbeat to the mgmt node to re-new our existence information
|
|
* (in case the mgmt node assumed our death for some reason like network errors etc.)
|
|
*/
|
|
void __InternodeSyncer_reregisterNode(InternodeSyncer* this)
|
|
{
|
|
Node* mgmtNode;
|
|
|
|
Node* localNode = App_getLocalNode(this->app);
|
|
NodeString alias;
|
|
NumNodeID localNodeNumID = Node_getNumID(localNode);
|
|
NicAddressList nicList;
|
|
|
|
HeartbeatMsgEx msg;
|
|
|
|
Node_cloneNicList(localNode, &nicList);
|
|
|
|
mgmtNode = NodeStoreEx_referenceFirstNode(this->mgmtNodes);
|
|
if(!mgmtNode)
|
|
goto exit;
|
|
|
|
Node_copyAlias(localNode, &alias);
|
|
HeartbeatMsgEx_initFromNodeData(&msg, alias.buf, localNodeNumID, NODETYPE_Client, &nicList);
|
|
HeartbeatMsgEx_setPorts(&msg, Config_getConnClientPort(this->cfg), 0);
|
|
|
|
DatagramListener_sendMsgToNode(this->dgramLis, mgmtNode, (NetMessage*)&msg);
|
|
|
|
Node_put(mgmtNode);
|
|
|
|
exit:
|
|
|
|
ListTk_kfreeNicAddressListElems(&nicList);
|
|
NicAddressList_uninit(&nicList);
|
|
|
|
}
|
|
|
|
/**
|
|
* @return true if an ack was received for the heartbeat, false otherwise
|
|
*/
|
|
bool __InternodeSyncer_unregisterNode(InternodeSyncer* this)
|
|
{
|
|
const char* logContext = "Deregistration";
|
|
Logger* log = App_getLogger(this->app);
|
|
|
|
/* note: be careful not to use datagrams here, because this is called during App stop and hence
|
|
the DGramLis is probably not even listening for responses anymore */
|
|
|
|
Node* mgmtNode;
|
|
|
|
NoAllocBufferStore* bufStore = App_getMsgBufStore(this->app);
|
|
Node* localNode = App_getLocalNode(this->app);
|
|
NodeString alias;
|
|
NumNodeID localNodeNumID = Node_getNumID(localNode);
|
|
|
|
RemoveNodeMsgEx msg;
|
|
char* respBuf;
|
|
NetMessage* respMsg;
|
|
FhgfsOpsErr requestRes;
|
|
//RemoveNodeRespMsg* rmResp; // response value not needed currently
|
|
bool nodeUnregistered = false;
|
|
|
|
mgmtNode = NodeStoreEx_referenceFirstNode(this->mgmtNodes);
|
|
if(!mgmtNode)
|
|
return false;
|
|
|
|
RemoveNodeMsgEx_initFromNodeData(&msg, localNodeNumID, NODETYPE_Client);
|
|
|
|
// connect & communicate
|
|
requestRes = MessagingTk_requestResponse(this->app, mgmtNode,
|
|
(NetMessage*)&msg, NETMSGTYPE_RemoveNodeResp, &respBuf, &respMsg);
|
|
|
|
if(unlikely(requestRes != FhgfsOpsErr_SUCCESS) )
|
|
{ // request/response failed
|
|
goto cleanup_request;
|
|
}
|
|
|
|
// handle result
|
|
// rmResp = (RemoveNodeRespMsg*)respMsg; // response value not needed currently
|
|
|
|
nodeUnregistered = true;
|
|
|
|
|
|
// cleanup
|
|
NETMESSAGE_FREE(respMsg);
|
|
NoAllocBufferStore_addBuf(bufStore, respBuf);
|
|
|
|
|
|
cleanup_request:
|
|
Node_put(mgmtNode);
|
|
|
|
// log deregistration result
|
|
if(nodeUnregistered)
|
|
Logger_log(log, Log_WARNING, logContext, "Node deregistration successful.");
|
|
else
|
|
{
|
|
Node_copyAlias(localNode, &alias);
|
|
Logger_log(log, Log_CRITICAL, logContext,
|
|
"Node deregistration failed. Management node offline?");
|
|
Logger_logFormatted(log, Log_CRITICAL, logContext,
|
|
"In case you didn't enable automatic removal, you will have to remove this ClientID "
|
|
"manually from the system: %s", alias.buf);
|
|
}
|
|
|
|
return nodeUnregistered;
|
|
}
|
|
|
|
|
|
void __InternodeSyncer_downloadAndSyncNodes(InternodeSyncer* this)
|
|
{
|
|
Node* mgmtNode;
|
|
Node* localNode;
|
|
|
|
NodeList metaNodesList;
|
|
NumNodeIDList addedMetaNodes;
|
|
NumNodeIDList removedMetaNodes;
|
|
NumNodeID rootNodeID = (NumNodeID){0};
|
|
bool rootIsBuddyMirrored;
|
|
|
|
NodeList storageNodesList;
|
|
NumNodeIDList addedStorageNodes;
|
|
NumNodeIDList removedStorageNodes;
|
|
|
|
|
|
mgmtNode = NodeStoreEx_referenceFirstNode(this->mgmtNodes);
|
|
if(!mgmtNode)
|
|
return;
|
|
|
|
localNode = App_getLocalNode(this->app);
|
|
|
|
// metadata nodes
|
|
|
|
NodeList_init(&metaNodesList);
|
|
NumNodeIDList_init(&addedMetaNodes);
|
|
NumNodeIDList_init(&removedMetaNodes);
|
|
|
|
if(NodesTk_downloadNodes(this->app, mgmtNode, NODETYPE_Meta, &metaNodesList, &rootNodeID,
|
|
&rootIsBuddyMirrored) )
|
|
{
|
|
const NodeOrGroup rootOwner = rootIsBuddyMirrored
|
|
? NodeOrGroup_fromGroup(rootNodeID.value)
|
|
: NodeOrGroup_fromNode(rootNodeID);
|
|
|
|
NodeStoreEx_syncNodes(this->metaNodes,
|
|
&metaNodesList, &addedMetaNodes, &removedMetaNodes, localNode);
|
|
NodeStoreEx_setRootOwner(this->metaNodes, rootOwner, false);
|
|
__InternodeSyncer_printSyncResults(this, NODETYPE_Meta, &addedMetaNodes, &removedMetaNodes);
|
|
}
|
|
|
|
NodeList_uninit(&metaNodesList);
|
|
NumNodeIDList_uninit(&addedMetaNodes);
|
|
NumNodeIDList_uninit(&removedMetaNodes);
|
|
|
|
// storage nodes
|
|
|
|
NodeList_init(&storageNodesList);
|
|
NumNodeIDList_init(&addedStorageNodes);
|
|
NumNodeIDList_init(&removedStorageNodes);
|
|
|
|
if(NodesTk_downloadNodes(this->app, mgmtNode, NODETYPE_Storage, &storageNodesList, NULL, NULL) )
|
|
{
|
|
NodeStoreEx_syncNodes(this->storageNodes,
|
|
&storageNodesList, &addedStorageNodes, &removedStorageNodes, localNode);
|
|
__InternodeSyncer_printSyncResults(this,
|
|
NODETYPE_Storage, &addedStorageNodes, &removedStorageNodes);
|
|
}
|
|
|
|
// cleanup
|
|
|
|
NodeList_uninit(&storageNodesList);
|
|
NumNodeIDList_uninit(&addedStorageNodes);
|
|
NumNodeIDList_uninit(&removedStorageNodes);
|
|
|
|
Node_put(mgmtNode);
|
|
}
|
|
|
|
void __InternodeSyncer_printSyncResults(InternodeSyncer* this, NodeType nodeType,
|
|
NumNodeIDList* addedNodes, NumNodeIDList* removedNodes)
|
|
{
|
|
const char* logContext = "Sync";
|
|
Logger* log = App_getLogger(this->app);
|
|
|
|
if(NumNodeIDList_length(addedNodes) )
|
|
Logger_logFormatted(log, Log_WARNING, logContext,
|
|
"Nodes added (sync results): %d (Type: %s)",
|
|
(int)NumNodeIDList_length(addedNodes), Node_nodeTypeToStr(nodeType) );
|
|
|
|
if(NumNodeIDList_length(removedNodes) )
|
|
Logger_logFormatted(log, Log_WARNING, logContext,
|
|
"Nodes removed (sync results): %d (Type: %s)",
|
|
(int)NumNodeIDList_length(removedNodes), Node_nodeTypeToStr(nodeType) );
|
|
}
|
|
|
|
void __InternodeSyncer_downloadAndSyncTargetMappings(InternodeSyncer* this)
|
|
{
|
|
TargetMapper* targetMapper = App_getTargetMapper(this->app);
|
|
Node* mgmtNode;
|
|
bool downloadRes;
|
|
|
|
LIST_HEAD(mappings);
|
|
|
|
mgmtNode = NodeStoreEx_referenceFirstNode(this->mgmtNodes);
|
|
if(!mgmtNode)
|
|
return;
|
|
|
|
downloadRes = NodesTk_downloadTargetMappings(this->app, mgmtNode, &mappings);
|
|
if(downloadRes)
|
|
TargetMapper_syncTargets(targetMapper, /*move*/&mappings);
|
|
|
|
// cleanup
|
|
|
|
Node_put(mgmtNode);
|
|
}
|
|
|
|
|
|
/**
|
|
* note: currently only downloads storage targets, not meta.
|
|
*
|
|
* @param nodeType the node type (NODETYPE_Storage or NODTYPE_Meta) for which the target states
|
|
* and buddy groups should be synced
|
|
*/
|
|
void __InternodeSyncer_updateTargetStatesAndBuddyGroups(InternodeSyncer* this, NodeType nodeType)
|
|
{
|
|
const char* logContext = "Update states and mirror groups";
|
|
Logger* log = App_getLogger(this->app);
|
|
|
|
NodeStoreEx* mgmtdNodes = App_getMgmtNodes(this->app);
|
|
TargetStateStore* targetStateStore;
|
|
MirrorBuddyGroupMapper* buddyGroupMapper;
|
|
|
|
Node* mgmtdNode;
|
|
bool downloadRes;
|
|
|
|
LIST_HEAD(buddyGroups); /* struct BuddyGroupMapping */
|
|
LIST_HEAD(states); /* struct TargetStateMapping */
|
|
|
|
switch (nodeType)
|
|
{
|
|
case NODETYPE_Storage:
|
|
buddyGroupMapper = App_getStorageBuddyGroupMapper(this->app);
|
|
targetStateStore = App_getTargetStateStore(this->app);
|
|
break;
|
|
|
|
case NODETYPE_Meta:
|
|
buddyGroupMapper = App_getMetaBuddyGroupMapper(this->app);
|
|
targetStateStore = App_getMetaStateStore(this->app);
|
|
break;
|
|
|
|
default:
|
|
return;
|
|
}
|
|
|
|
mgmtdNode = NodeStoreEx_referenceFirstNode(mgmtdNodes);
|
|
if(!mgmtdNode)
|
|
return;
|
|
|
|
downloadRes = NodesTk_downloadStatesAndBuddyGroups(this->app, mgmtdNode, nodeType,
|
|
&buddyGroups, &states);
|
|
|
|
if(downloadRes)
|
|
{
|
|
TargetStateStore_syncStatesAndGroupsFromLists(targetStateStore, this->app->cfg,
|
|
buddyGroupMapper, &states, &buddyGroups);
|
|
|
|
Time_setToNow(&this->lastSuccessfulTargetStatesUpdateT);
|
|
Logger_logFormatted(log, Log_DEBUG, logContext, "%s states synced.",
|
|
(nodeType == NODETYPE_Meta) ? "Metadata node" : "Storage target");
|
|
}
|
|
else
|
|
if( (this->targetOfflineTimeoutMS != 0)
|
|
&& (Time_elapsedMS(&this->lastSuccessfulTargetStatesUpdateT) > this->targetOfflineTimeoutMS) )
|
|
{
|
|
bool setStateRes =
|
|
TargetStateStore_setAllStates(targetStateStore, TargetReachabilityState_OFFLINE);
|
|
|
|
if(setStateRes)
|
|
Logger_logFormatted(log, Log_WARNING, logContext,
|
|
"%s state sync failed. All %s set to offline.",
|
|
(nodeType == NODETYPE_Meta) ? "Metadata node" : "Storage target",
|
|
(nodeType == NODETYPE_Meta) ? "nodes" : "targets");
|
|
}
|
|
else
|
|
{
|
|
bool setStatesRes =
|
|
TargetStateStore_setAllStates(targetStateStore, TargetReachabilityState_POFFLINE);
|
|
|
|
if(setStatesRes)
|
|
Logger_logFormatted(log, Log_WARNING, logContext,
|
|
"%s state sync failed. All %s set to probably-offline.",
|
|
(nodeType == NODETYPE_Meta) ? "Metadata node" : "Storage target",
|
|
(nodeType == NODETYPE_Meta) ? "nodes" : "targets");
|
|
}
|
|
|
|
// cleanup
|
|
|
|
BEEGFS_KFREE_LIST(&buddyGroups, struct BuddyGroupMapping, _list);
|
|
BEEGFS_KFREE_LIST(&states, struct TargetStateMapping, _list);
|
|
|
|
Node_put(mgmtdNode);
|
|
}
|
|
|
|
|
|
/**
|
|
* Try to close all delayedClose files.
|
|
*/
|
|
void __InternodeSyncer_delayedCloseComm(InternodeSyncer* this)
|
|
{
|
|
PointerListIter iter;
|
|
|
|
Mutex_lock(&this->delayedCloseMutex); // L O C K
|
|
|
|
PointerListIter_init(&iter, &this->delayedCloseQueue);
|
|
|
|
while(!PointerListIter_end(&iter) && !Thread_getSelfTerminate( (Thread*)this) )
|
|
{
|
|
DelayedCloseEntry* currentClose = PointerListIter_value(&iter);
|
|
|
|
EntryInfo* entryInfo;
|
|
RemotingIOInfo ioInfo;
|
|
FhgfsOpsErr closeRes;
|
|
|
|
__InternodeSyncer_delayedClosePrepareRemoting(this, currentClose, &entryInfo, &ioInfo);
|
|
|
|
// note: unlock, so that more entries can be added to the queue during remoting
|
|
Mutex_unlock(&this->delayedCloseMutex); // U N L O C K
|
|
|
|
closeRes = FhgfsOpsRemoting_closefileEx(entryInfo, &ioInfo, false,
|
|
currentClose->hasEvent ? ¤tClose->event : NULL);
|
|
|
|
Mutex_lock(&this->delayedCloseMutex); // R E L O C K
|
|
|
|
if(closeRes == FhgfsOpsErr_COMMUNICATION)
|
|
{ // comm error => we will try again later
|
|
PointerListIter_next(&iter);
|
|
}
|
|
else
|
|
{ /* anything other than communication error means our job is done and we can delete the
|
|
entry */
|
|
__InternodeSyncer_delayedCloseFreeEntry(this, currentClose);
|
|
|
|
iter = PointerListIter_remove(&iter);
|
|
}
|
|
}
|
|
|
|
|
|
Mutex_unlock(&this->delayedCloseMutex); // U N L O C K
|
|
}
|
|
|
|
/**
|
|
* Try to unlock all delayedEntryUnlock files.
|
|
*/
|
|
void __InternodeSyncer_delayedEntryUnlockComm(InternodeSyncer* this)
|
|
{
|
|
PointerListIter iter;
|
|
|
|
Mutex_lock(&this->delayedEntryUnlockMutex); // L O C K
|
|
|
|
PointerListIter_init(&iter, &this->delayedEntryUnlockQueue);
|
|
|
|
while(!PointerListIter_end(&iter) && !Thread_getSelfTerminate( (Thread*)this) )
|
|
{
|
|
DelayedEntryUnlockEntry* currentUnlock = PointerListIter_value(&iter);
|
|
|
|
EntryInfo* entryInfo;
|
|
RemotingIOInfo ioInfo;
|
|
FhgfsOpsErr unlockRes;
|
|
|
|
__InternodeSyncer_delayedEntryUnlockPrepareRemoting(this, currentUnlock, &entryInfo, &ioInfo);
|
|
|
|
// note: unlock, so that more entries can be added to the queue during remoting
|
|
Mutex_unlock(&this->delayedEntryUnlockMutex); // U N L O C K
|
|
|
|
unlockRes = FhgfsOpsRemoting_flockEntryEx(entryInfo, NULL, this->app, ioInfo.fileHandleID,
|
|
currentUnlock->clientFD, 0, ENTRYLOCKTYPE_CANCEL, false);
|
|
|
|
Mutex_lock(&this->delayedEntryUnlockMutex); // R E L O C K
|
|
|
|
if(unlockRes == FhgfsOpsErr_COMMUNICATION)
|
|
{ // comm error => we will try again later
|
|
PointerListIter_next(&iter);
|
|
}
|
|
else
|
|
{ /* anything other than communication error means our job is done and we can delete the
|
|
entry */
|
|
__InternodeSyncer_delayedEntryUnlockFreeEntry(this, currentUnlock);
|
|
|
|
iter = PointerListIter_remove(&iter);
|
|
}
|
|
}
|
|
|
|
|
|
Mutex_unlock(&this->delayedEntryUnlockMutex); // U N L O C K
|
|
}
|
|
|
|
/**
|
|
* Try to unlock all delayedRangeUnlock files.
|
|
*/
|
|
void __InternodeSyncer_delayedRangeUnlockComm(InternodeSyncer* this)
|
|
{
|
|
PointerListIter iter;
|
|
|
|
Mutex_lock(&this->delayedRangeUnlockMutex); // L O C K
|
|
|
|
PointerListIter_init(&iter, &this->delayedRangeUnlockQueue);
|
|
|
|
while(!PointerListIter_end(&iter) && !Thread_getSelfTerminate( (Thread*)this) )
|
|
{
|
|
DelayedRangeUnlockEntry* currentUnlock = PointerListIter_value(&iter);
|
|
|
|
EntryInfo* entryInfo;
|
|
RemotingIOInfo ioInfo;
|
|
FhgfsOpsErr unlockRes;
|
|
|
|
__InternodeSyncer_delayedRangeUnlockPrepareRemoting(this, currentUnlock, &entryInfo, &ioInfo);
|
|
|
|
// note: unlock, so that more entries can be added to the queue during remoting
|
|
Mutex_unlock(&this->delayedRangeUnlockMutex); // U N L O C K
|
|
|
|
unlockRes = FhgfsOpsRemoting_flockRangeEx(entryInfo, NULL, ioInfo.app, ioInfo.fileHandleID,
|
|
currentUnlock->ownerPID, ENTRYLOCKTYPE_CANCEL, 0, ~0ULL, false);
|
|
|
|
Mutex_lock(&this->delayedRangeUnlockMutex); // R E L O C K
|
|
|
|
if(unlockRes == FhgfsOpsErr_COMMUNICATION)
|
|
{ // comm error => we will try again later
|
|
PointerListIter_next(&iter);
|
|
}
|
|
else
|
|
{ /* anything other than communication error means our job is done and we can delete the
|
|
entry */
|
|
__InternodeSyncer_delayedRangeUnlockFreeEntry(this, currentUnlock);
|
|
|
|
iter = PointerListIter_remove(&iter);
|
|
}
|
|
}
|
|
|
|
|
|
Mutex_unlock(&this->delayedRangeUnlockMutex); // U N L O C K
|
|
}
|
|
|
|
|
|
/**
|
|
* Prepare remoting args for delayed close.
|
|
*
|
|
* Note: This uses only references to the closeEntry, so no cleanup call required.
|
|
*/
|
|
void __InternodeSyncer_delayedClosePrepareRemoting(InternodeSyncer* this,
|
|
DelayedCloseEntry* closeEntry, EntryInfo** outEntryInfo, RemotingIOInfo* outIOInfo)
|
|
{
|
|
*outEntryInfo = &closeEntry->entryInfo;
|
|
|
|
outIOInfo->app = this->app;
|
|
outIOInfo->fileHandleID = closeEntry->fileHandleID;
|
|
outIOInfo->pattern = NULL;
|
|
outIOInfo->accessFlags = closeEntry->accessFlags;
|
|
outIOInfo->needsAppendLockCleanup = &closeEntry->needsAppendLockCleanup;
|
|
outIOInfo->maxUsedTargetIndex = &closeEntry->maxUsedTargetIndex;
|
|
outIOInfo->firstWriteDone = NULL;
|
|
outIOInfo->userID = 0;
|
|
outIOInfo->groupID = 0;
|
|
#ifdef BEEGFS_NVFS
|
|
outIOInfo->nvfs = false;
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Prepare remoting args for delayed entry unlock.
|
|
*
|
|
* Note: This uses only references to the unlockEntry, so no cleanup call required.
|
|
*/
|
|
void __InternodeSyncer_delayedEntryUnlockPrepareRemoting(InternodeSyncer* this,
|
|
DelayedEntryUnlockEntry* unlockEntry, EntryInfo** outEntryInfo, RemotingIOInfo* outIOInfo)
|
|
{
|
|
*outEntryInfo = &unlockEntry->entryInfo;
|
|
|
|
outIOInfo->app = this->app;
|
|
outIOInfo->fileHandleID = unlockEntry->fileHandleID;
|
|
outIOInfo->pattern = NULL;
|
|
outIOInfo->accessFlags = 0;
|
|
outIOInfo->maxUsedTargetIndex = NULL;
|
|
outIOInfo->firstWriteDone = NULL;
|
|
outIOInfo->userID = 0;
|
|
outIOInfo->groupID = 0;
|
|
#ifdef BEEGFS_NVFS
|
|
outIOInfo->nvfs = false;
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Prepare remoting args for delayed range unlock.
|
|
*
|
|
* Note: This uses only references to the unlockEntry, so no cleanup call required.
|
|
*/
|
|
void __InternodeSyncer_delayedRangeUnlockPrepareRemoting(InternodeSyncer* this,
|
|
DelayedRangeUnlockEntry* unlockEntry, EntryInfo** outEntryInfo, RemotingIOInfo* outIOInfo)
|
|
{
|
|
*outEntryInfo = &unlockEntry->entryInfo;
|
|
|
|
outIOInfo->app = this->app;
|
|
outIOInfo->fileHandleID = unlockEntry->fileHandleID;
|
|
outIOInfo->pattern = NULL;
|
|
outIOInfo->accessFlags = 0;
|
|
outIOInfo->maxUsedTargetIndex = NULL;
|
|
outIOInfo->firstWriteDone = NULL;
|
|
outIOInfo->userID = 0;
|
|
outIOInfo->groupID = 0;
|
|
#ifdef BEEGFS_NVFS
|
|
outIOInfo->nvfs = false;
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Frees/uninits all sub-fields and kfrees the closeEntry itself (but does not remove it from the
|
|
* queue).
|
|
*/
|
|
void __InternodeSyncer_delayedCloseFreeEntry(InternodeSyncer* this, DelayedCloseEntry* closeEntry)
|
|
{
|
|
EntryInfo_uninit(&closeEntry->entryInfo);
|
|
if (closeEntry->hasEvent)
|
|
FileEvent_uninit(&closeEntry->event);
|
|
|
|
kfree(closeEntry->fileHandleID);
|
|
|
|
AtomicInt_uninit(&closeEntry->maxUsedTargetIndex);
|
|
|
|
kfree(closeEntry);
|
|
}
|
|
|
|
/**
|
|
* Frees/uninits all sub-fields and kfrees the unlockEntry itself (but does not remove it from the
|
|
* queue).
|
|
*/
|
|
void __InternodeSyncer_delayedEntryUnlockFreeEntry(InternodeSyncer* this,
|
|
DelayedEntryUnlockEntry* unlockEntry)
|
|
{
|
|
EntryInfo_uninit(&unlockEntry->entryInfo);
|
|
|
|
kfree(unlockEntry->fileHandleID);
|
|
|
|
kfree(unlockEntry);
|
|
}
|
|
|
|
/**
|
|
* Frees/uninits all sub-fields and kfrees the unlockEntry itself (but does not remove it from the
|
|
* queue).
|
|
*/
|
|
void __InternodeSyncer_delayedRangeUnlockFreeEntry(InternodeSyncer* this,
|
|
DelayedRangeUnlockEntry* unlockEntry)
|
|
{
|
|
EntryInfo_uninit(&unlockEntry->entryInfo);
|
|
|
|
kfree(unlockEntry->fileHandleID);
|
|
|
|
kfree(unlockEntry);
|
|
}
|
|
|
|
/**
|
|
* Add an entry that could not be closed on the server due to communication error and should be
|
|
* retried again later.
|
|
*
|
|
* @param entryInfo will be copied
|
|
* @param ioInfo will be copied
|
|
*/
|
|
void InternodeSyncer_delayedCloseAdd(InternodeSyncer* this, const EntryInfo* entryInfo,
|
|
const RemotingIOInfo* ioInfo, struct FileEvent* event)
|
|
{
|
|
DelayedCloseEntry* newClose = (DelayedCloseEntry*)os_kmalloc(sizeof(*newClose) );
|
|
|
|
Time_init(&newClose->ageT);
|
|
|
|
EntryInfo_dup(entryInfo, &newClose->entryInfo);
|
|
|
|
newClose->fileHandleID = StringTk_strDup(ioInfo->fileHandleID);
|
|
newClose->accessFlags = ioInfo->accessFlags;
|
|
|
|
newClose->needsAppendLockCleanup =
|
|
(ioInfo->needsAppendLockCleanup && *ioInfo->needsAppendLockCleanup);
|
|
|
|
AtomicInt_init(&newClose->maxUsedTargetIndex, AtomicInt_read(ioInfo->maxUsedTargetIndex) );
|
|
|
|
newClose->hasEvent = event != NULL;
|
|
if (event)
|
|
newClose->event = *event;
|
|
|
|
Mutex_lock(&this->delayedCloseMutex); // L O C K
|
|
|
|
PointerList_append(&this->delayedCloseQueue, newClose);
|
|
|
|
Mutex_unlock(&this->delayedCloseMutex); // U N L O C K
|
|
}
|
|
|
|
/**
|
|
* Add an entry that could not be unlocked on the server due to communication error and should be
|
|
* retried again later.
|
|
*
|
|
* @param entryInfo will be copied
|
|
* @param ioInfo will be copied
|
|
*/
|
|
void InternodeSyncer_delayedEntryUnlockAdd(InternodeSyncer* this, const EntryInfo* entryInfo,
|
|
const RemotingIOInfo* ioInfo, int64_t clientFD)
|
|
{
|
|
DelayedEntryUnlockEntry* newUnlock = (DelayedEntryUnlockEntry*)os_kmalloc(sizeof(*newUnlock) );
|
|
|
|
Time_init(&newUnlock->ageT);
|
|
|
|
EntryInfo_dup(entryInfo, &newUnlock->entryInfo);
|
|
|
|
newUnlock->fileHandleID = StringTk_strDup(ioInfo->fileHandleID);
|
|
newUnlock->clientFD = clientFD;
|
|
|
|
Mutex_lock(&this->delayedEntryUnlockMutex); // L O C K
|
|
|
|
PointerList_append(&this->delayedEntryUnlockQueue, newUnlock);
|
|
|
|
Mutex_unlock(&this->delayedEntryUnlockMutex); // U N L O C K
|
|
}
|
|
|
|
/**
|
|
* Add an entry that could not be unlocked on the server due to communication error and should be
|
|
* retried again later.
|
|
*
|
|
* @param entryInfo will be copied
|
|
* @param ioInfo will be copied
|
|
*/
|
|
void InternodeSyncer_delayedRangeUnlockAdd(InternodeSyncer* this, const EntryInfo* entryInfo,
|
|
const RemotingIOInfo* ioInfo, int ownerPID)
|
|
{
|
|
DelayedRangeUnlockEntry* newUnlock = (DelayedRangeUnlockEntry*)os_kmalloc(sizeof(*newUnlock) );
|
|
|
|
Time_init(&newUnlock->ageT);
|
|
|
|
EntryInfo_dup(entryInfo, &newUnlock->entryInfo);
|
|
|
|
newUnlock->fileHandleID = StringTk_strDup(ioInfo->fileHandleID);
|
|
newUnlock->ownerPID = ownerPID;
|
|
|
|
Mutex_lock(&this->delayedRangeUnlockMutex); // L O C K
|
|
|
|
PointerList_append(&this->delayedRangeUnlockQueue, newUnlock);
|
|
|
|
Mutex_unlock(&this->delayedRangeUnlockMutex); // U N L O C K
|
|
}
|
|
|
|
size_t InternodeSyncer_getDelayedCloseQueueSize(InternodeSyncer* this)
|
|
{
|
|
size_t retVal;
|
|
|
|
Mutex_lock(&this->delayedCloseMutex); // L O C K
|
|
|
|
retVal = PointerList_length(&this->delayedCloseQueue);
|
|
|
|
Mutex_unlock(&this->delayedCloseMutex); // U N L O C K
|
|
|
|
return retVal;
|
|
}
|
|
|
|
size_t InternodeSyncer_getDelayedEntryUnlockQueueSize(InternodeSyncer* this)
|
|
{
|
|
size_t retVal;
|
|
|
|
Mutex_lock(&this->delayedEntryUnlockMutex); // L O C K
|
|
|
|
retVal = PointerList_length(&this->delayedEntryUnlockQueue);
|
|
|
|
Mutex_unlock(&this->delayedEntryUnlockMutex); // U N L O C K
|
|
|
|
return retVal;
|
|
}
|
|
|
|
size_t InternodeSyncer_getDelayedRangeUnlockQueueSize(InternodeSyncer* this)
|
|
{
|
|
size_t retVal;
|
|
|
|
Mutex_lock(&this->delayedRangeUnlockMutex); // L O C K
|
|
|
|
retVal = PointerList_length(&this->delayedRangeUnlockQueue);
|
|
|
|
Mutex_unlock(&this->delayedRangeUnlockMutex); // U N L O C K
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Drop/reset idle conns from all server stores.
|
|
*/
|
|
void __InternodeSyncer_dropIdleConns(InternodeSyncer* this)
|
|
{
|
|
Logger* log = App_getLogger(this->app);
|
|
const char* logContext = "Idle disconnect";
|
|
|
|
unsigned numDroppedConns = 0;
|
|
|
|
numDroppedConns += __InternodeSyncer_dropIdleConnsByStore(this, App_getMgmtNodes(this->app) );
|
|
numDroppedConns += __InternodeSyncer_dropIdleConnsByStore(this, App_getMetaNodes(this->app) );
|
|
numDroppedConns += __InternodeSyncer_dropIdleConnsByStore(this, App_getStorageNodes(this->app) );
|
|
|
|
if(numDroppedConns)
|
|
{
|
|
Logger_logFormatted(log, Log_DEBUG, logContext, "Dropped idle connections: %u",
|
|
numDroppedConns);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Walk over all nodes in the given store and drop/reset idle connections.
|
|
*
|
|
* @return number of dropped connections
|
|
*/
|
|
unsigned __InternodeSyncer_dropIdleConnsByStore(InternodeSyncer* this, NodeStoreEx* nodes)
|
|
{
|
|
unsigned numDroppedConns = 0;
|
|
|
|
Node* node = NodeStoreEx_referenceFirstNode(nodes);
|
|
while(node)
|
|
{
|
|
NodeConnPool* connPool = Node_getConnPool(node);
|
|
|
|
numDroppedConns += NodeConnPool_disconnectAndResetIdleStreams(connPool);
|
|
|
|
node = NodeStoreEx_referenceNextNodeAndReleaseOld(nodes, node); // iterate to next node
|
|
}
|
|
|
|
return numDroppedConns;
|
|
}
|
|
|