1186 lines
37 KiB
C++
1186 lines
37 KiB
C++
#include <app/config/Config.h>
|
|
#include <app/App.h>
|
|
#include <common/net/message/nodes/ChangeTargetConsistencyStatesMsg.h>
|
|
#include <common/net/message/nodes/ChangeTargetConsistencyStatesRespMsg.h>
|
|
#include <common/net/message/nodes/GetNodeCapacityPoolsRespMsg.h>
|
|
#include <common/net/message/nodes/HeartbeatMsg.h>
|
|
#include <common/net/message/nodes/MapTargetsMsg.h>
|
|
#include <common/net/message/nodes/MapTargetsRespMsg.h>
|
|
#include <common/net/message/nodes/RefreshCapacityPoolsMsg.h>
|
|
#include <common/net/message/nodes/SetTargetConsistencyStatesMsg.h>
|
|
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
|
|
#include <common/net/message/nodes/GetTargetConsistencyStatesMsg.h>
|
|
#include <common/net/message/nodes/GetTargetConsistencyStatesRespMsg.h>
|
|
#include <common/net/message/storage/SetStorageTargetInfoMsg.h>
|
|
#include <common/net/message/storage/SetStorageTargetInfoRespMsg.h>
|
|
#include <common/net/message/storage/quota/RequestExceededQuotaMsg.h>
|
|
#include <common/net/message/storage/quota/RequestExceededQuotaRespMsg.h>
|
|
#include <common/toolkit/MessagingTk.h>
|
|
#include <common/toolkit/NodesTk.h>
|
|
#include <common/toolkit/Time.h>
|
|
#include <common/toolkit/ZipIterator.h>
|
|
#include <common/nodes/NodeStore.h>
|
|
#include <common/nodes/TargetCapacityPools.h>
|
|
#include <net/msghelpers/MsgHelperIO.h>
|
|
#include <program/Program.h>
|
|
|
|
#include "InternodeSyncer.h"
|
|
|
|
#include <boost/lexical_cast.hpp>
|
|
|
|
// forward declaration
|
|
namespace UUID {
|
|
std::string getMachineUUID();
|
|
}
|
|
|
|
InternodeSyncer::InternodeSyncer():
|
|
PThread("XNodeSync"),
|
|
log("XNodeSync"), forceTargetStatesUpdate(true), forcePublishCapacities(true)
|
|
{
|
|
}
|
|
|
|
InternodeSyncer::~InternodeSyncer()
|
|
{
|
|
}
|
|
|
|
|
|
void InternodeSyncer::run()
|
|
{
|
|
try
|
|
{
|
|
registerSignalHandler();
|
|
|
|
syncLoop();
|
|
|
|
log.log(Log_DEBUG, "Component stopped.");
|
|
}
|
|
catch(std::exception& e)
|
|
{
|
|
PThread::getCurrentThreadApp()->handleComponentException(e);
|
|
}
|
|
}
|
|
|
|
void InternodeSyncer::syncLoop()
|
|
{
|
|
App* app = Program::getApp();
|
|
Config* cfg = app->getConfig();
|
|
|
|
const int sleepIntervalMS = 3*1000; // 3sec
|
|
|
|
const unsigned sweepNormalMS = 5*1000; // 5sec
|
|
const unsigned sweepStressedMS = 2*1000; // 2sec
|
|
const unsigned checkNetworkIntervalMS = 60*1000; // 1 minute
|
|
const unsigned idleDisconnectIntervalMS = 70*60*1000; /* 70 minutes (must be less than half the
|
|
streamlis idle disconnect interval to avoid cases where streamlis disconnects first) */
|
|
const unsigned downloadNodesIntervalMS = 600000; // 10min
|
|
const unsigned updateStoragePoolsMS = downloadNodesIntervalMS;
|
|
|
|
// If (undocumented) sysUpdateTargetStatesSecs is set in config, use that value, otherwise
|
|
// default to 1/6 sysTargetOfflineTimeoutSecs.
|
|
const unsigned updateTargetStatesMS =
|
|
(cfg->getSysUpdateTargetStatesSecs() != 0)
|
|
? cfg->getSysUpdateTargetStatesSecs() * 1000
|
|
: cfg->getSysTargetOfflineTimeoutSecs() * 166;
|
|
|
|
const unsigned updateCapacitiesMS = updateTargetStatesMS * 4;
|
|
|
|
Time lastCacheSweepT;
|
|
Time lastCheckNetworkT;
|
|
Time lastIdleDisconnectT;
|
|
Time lastDownloadNodesT;
|
|
Time lastTargetStatesUpdateT;
|
|
Time lastStoragePoolsUpdateT;
|
|
Time lastCapacityUpdateT;
|
|
bool doRegisterLocalNode = false;
|
|
|
|
unsigned currentCacheSweepMS = sweepNormalMS; // (adapted inside the loop below)
|
|
|
|
while(!waitForSelfTerminateOrder(sleepIntervalMS) )
|
|
{
|
|
bool targetStatesUpdateForced = getAndResetForceTargetStatesUpdate();
|
|
bool publishCapacitiesForced = getAndResetForcePublishCapacities();
|
|
bool storagePoolsUpdateForced = getAndResetForceStoragePoolsUpdate();
|
|
bool checkNetworkForced = getAndResetForceCheckNetwork();
|
|
|
|
if(lastCacheSweepT.elapsedMS() > currentCacheSweepMS)
|
|
{
|
|
bool flushTriggered = app->getChunkDirStore()->cacheSweepAsync();
|
|
currentCacheSweepMS = (flushTriggered ? sweepStressedMS : sweepNormalMS);
|
|
|
|
lastCacheSweepT.setToNow();
|
|
}
|
|
|
|
if (checkNetworkForced ||
|
|
(lastCheckNetworkT.elapsedMS() > checkNetworkIntervalMS))
|
|
{
|
|
if (checkNetwork())
|
|
doRegisterLocalNode = true;
|
|
lastCheckNetworkT.setToNow();
|
|
}
|
|
|
|
if (doRegisterLocalNode)
|
|
doRegisterLocalNode = !registerNode(app->getDatagramListener());
|
|
|
|
if(lastIdleDisconnectT.elapsedMS() > idleDisconnectIntervalMS)
|
|
{
|
|
dropIdleConns();
|
|
lastIdleDisconnectT.setToNow();
|
|
}
|
|
|
|
// download & sync nodes
|
|
if(lastDownloadNodesT.elapsedMS() > downloadNodesIntervalMS)
|
|
{
|
|
downloadAndSyncNodes();
|
|
downloadAndSyncTargetMappings();
|
|
downloadAndSyncMirrorBuddyGroups();
|
|
|
|
lastDownloadNodesT.setToNow();
|
|
}
|
|
|
|
if (storagePoolsUpdateForced ||
|
|
(lastStoragePoolsUpdateT.elapsedMS() > updateStoragePoolsMS))
|
|
{
|
|
downloadAndSyncStoragePools();
|
|
|
|
lastStoragePoolsUpdateT.setToNow();
|
|
}
|
|
|
|
if( targetStatesUpdateForced ||
|
|
(lastTargetStatesUpdateT.elapsedMS() > updateTargetStatesMS) )
|
|
{
|
|
updateTargetStatesAndBuddyGroups();
|
|
lastTargetStatesUpdateT.setToNow();
|
|
}
|
|
|
|
if( publishCapacitiesForced ||
|
|
(lastCapacityUpdateT.elapsedMS() > updateCapacitiesMS))
|
|
{
|
|
publishTargetCapacities();
|
|
lastCapacityUpdateT.setToNow();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Inspect the available and allowed network interfaces for any changes.
|
|
*/
|
|
bool InternodeSyncer::checkNetwork()
|
|
{
|
|
App* app = Program::getApp();
|
|
NicAddressList newLocalNicList;
|
|
bool res = false;
|
|
|
|
app->findAllowedInterfaces(newLocalNicList);
|
|
app->findAllowedRDMAInterfaces(newLocalNicList);
|
|
if (!std::equal(newLocalNicList.begin(), newLocalNicList.end(), app->getLocalNicList().begin()))
|
|
{
|
|
log.log(Log_NOTICE, "checkNetwork: local interfaces have changed");
|
|
app->updateLocalNicList(newLocalNicList);
|
|
res = true;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Drop/reset idle conns from all server stores.
|
|
*/
|
|
void InternodeSyncer::dropIdleConns()
|
|
{
|
|
App* app = Program::getApp();
|
|
|
|
unsigned numDroppedConns = 0;
|
|
|
|
numDroppedConns += dropIdleConnsByStore(app->getMgmtNodes() );
|
|
numDroppedConns += dropIdleConnsByStore(app->getMetaNodes() );
|
|
numDroppedConns += dropIdleConnsByStore(app->getStorageNodes() );
|
|
|
|
if(numDroppedConns)
|
|
{
|
|
log.log(Log_DEBUG, "Dropped idle connections: " + StringTk::uintToStr(numDroppedConns) );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Walk over all nodes in the given store and drop/reset idle connections.
|
|
*
|
|
* @return number of dropped connections
|
|
*/
|
|
unsigned InternodeSyncer::dropIdleConnsByStore(NodeStoreServers* nodes)
|
|
{
|
|
App* app = Program::getApp();
|
|
|
|
unsigned numDroppedConns = 0;
|
|
|
|
for (const auto& node : nodes->referenceAllNodes())
|
|
{
|
|
/* don't do any idle disconnect stuff with local node
|
|
(the LocalNodeConnPool doesn't support and doesn't need this kind of treatment) */
|
|
|
|
if (node.get() != &app->getLocalNode())
|
|
{
|
|
NodeConnPool* connPool = node->getConnPool();
|
|
|
|
numDroppedConns += connPool->disconnectAndResetIdleStreams();
|
|
}
|
|
}
|
|
|
|
return numDroppedConns;
|
|
}
|
|
|
|
void InternodeSyncer::updateTargetStatesAndBuddyGroups()
|
|
{
|
|
const char* logContext = "Update states and mirror groups";
|
|
LogContext(logContext).log(LogTopic_STATES, Log_DEBUG,
|
|
"Starting target state update.");
|
|
|
|
App* app = Program::getApp();
|
|
NodeStore* mgmtNodes = app->getMgmtNodes();
|
|
TargetStateStore* targetStateStore = app->getTargetStateStore();
|
|
StorageTargets* storageTargets = app->getStorageTargets();
|
|
MirrorBuddyGroupMapper* mirrorBuddyGroupMapper = app->getMirrorBuddyGroupMapper();
|
|
|
|
static bool downloadFailedLogged = false; // to avoid log spamming
|
|
static bool publishFailedLogged = false; // to avoid log spamming
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if (unlikely(!mgmtNode))
|
|
{ // should never happen here, because mgmt is downloaded before InternodeSyncer startup
|
|
LogContext(logContext).log(LogTopic_STATES, Log_ERR, "Management node not defined.");
|
|
return;
|
|
}
|
|
|
|
unsigned numRetries = 10; // If publishing states fails 10 times, give up (-> POFFLINE).
|
|
|
|
// Note: Publishing states fails if between downloadStatesAndBuddyGroups and
|
|
// publishLocalTargetStateChanges, a state on the mgmtd is changed (e.g. because the primary
|
|
// sets NEEDS_RESYNC for the secondary). In that case, we will retry.
|
|
|
|
LogContext(logContext).log(LogTopic_STATES, Log_DEBUG,
|
|
"Beginning target state update...");
|
|
bool publishSuccess = false;
|
|
|
|
while (!publishSuccess && (numRetries--) )
|
|
{
|
|
MirrorBuddyGroupMap buddyGroups;
|
|
TargetStateMap states;
|
|
|
|
bool downloadRes = NodesTk::downloadStatesAndBuddyGroups(*mgmtNode, NODETYPE_Storage,
|
|
buddyGroups, states, true);
|
|
|
|
if (!downloadRes)
|
|
{
|
|
if(!downloadFailedLogged)
|
|
{
|
|
LogContext(logContext).log(LogTopic_STATES, Log_WARNING,
|
|
"Downloading target states from management node failed. "
|
|
"Setting all targets to probably-offline.");
|
|
downloadFailedLogged = true;
|
|
}
|
|
|
|
targetStateStore->setAllStates(TargetReachabilityState_POFFLINE);
|
|
|
|
break;
|
|
}
|
|
|
|
downloadFailedLogged = false;
|
|
|
|
// before anything else is done, update the targetWasOffline flags in the resyncers. updating
|
|
// them later opens a window of opportunity where the target state store says "offline", but
|
|
// the resyncer has not noticed - which would erroneously not fail the resync.
|
|
for (const auto& state : states)
|
|
{
|
|
if (state.second.reachabilityState == TargetReachabilityState_OFFLINE)
|
|
{
|
|
const auto job = app->getBuddyResyncer()->getResyncJob(state.first);
|
|
if (job)
|
|
job->setTargetOffline();
|
|
}
|
|
}
|
|
|
|
// Sync buddy groups here, because decideResync depends on it.
|
|
// This is not a problem because if pushing target states fails all targets will be
|
|
// (p)offline anyway.
|
|
targetStateStore->syncStatesAndGroups(mirrorBuddyGroupMapper, states, buddyGroups,
|
|
app->getLocalNode().getNumID());
|
|
|
|
TargetStateMap localTargetChangedStates;
|
|
storageTargets->decideResync(states, localTargetChangedStates);
|
|
|
|
publishSuccess = publishLocalTargetStateChanges(states, localTargetChangedStates);
|
|
|
|
if(publishSuccess)
|
|
storageTargets->checkBuddyNeedsResync();
|
|
}
|
|
|
|
if(!publishSuccess)
|
|
{
|
|
if(!publishFailedLogged)
|
|
{
|
|
log.log(LogTopic_STATES, Log_WARNING,
|
|
"Pushing local target states to management node failed.");
|
|
publishFailedLogged = true;
|
|
}
|
|
}
|
|
else
|
|
publishFailedLogged = false;
|
|
}
|
|
|
|
void InternodeSyncer::publishTargetCapacities()
|
|
{
|
|
App* app = Program::getApp();
|
|
NodeStore* mgmtNodes = app->getMgmtNodes();
|
|
StorageTargets* storageTargets = app->getStorageTargets();
|
|
|
|
log.log(LogTopic_STATES, Log_DEBUG, "Publishing target capacity infos.");
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if (!mgmtNode)
|
|
{
|
|
log.log(LogTopic_STATES, Log_ERR, "Management node not defined.");
|
|
return;
|
|
}
|
|
|
|
StorageTargetInfoList targetInfoList;
|
|
|
|
storageTargets->generateTargetInfoList(targetInfoList);
|
|
|
|
SetStorageTargetInfoMsg msg(NODETYPE_Storage, &targetInfoList);
|
|
RequestResponseArgs rrArgs(mgmtNode.get(), &msg, NETMSGTYPE_SetStorageTargetInfoResp);
|
|
|
|
#ifndef BEEGFS_DEBUG
|
|
rrArgs.logFlags |= REQUESTRESPONSEARGS_LOGFLAG_CONNESTABLISHFAILED
|
|
| REQUESTRESPONSEARGS_LOGFLAG_RETRY;
|
|
#endif
|
|
|
|
bool sendRes = MessagingTk::requestResponse(&rrArgs);
|
|
|
|
static bool failureLogged = false;
|
|
|
|
if (!sendRes)
|
|
{
|
|
if (!failureLogged)
|
|
log.log(LogTopic_STATES, Log_CRITICAL,
|
|
"Pushing target free space info to management node failed.");
|
|
|
|
failureLogged = true;
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
const auto respMsgCast =
|
|
static_cast<const SetStorageTargetInfoRespMsg*>(rrArgs.outRespMsg.get());
|
|
|
|
failureLogged = false;
|
|
|
|
if ( (FhgfsOpsErr)respMsgCast->getValue() != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
log.log(LogTopic_STATES, Log_CRITICAL,
|
|
"Management did not accept target free space info message.");
|
|
return;
|
|
}
|
|
}
|
|
|
|
// If we were just started and are publishing our capacity for the first time, force a pool
|
|
// refresh on the mgmtd so we're not stuck in the emergency pool until the first regular
|
|
// pool refresh.
|
|
static bool firstTimePubilished = true;
|
|
if (firstTimePubilished)
|
|
{
|
|
forceMgmtdPoolsRefresh();
|
|
firstTimePubilished = false;
|
|
}
|
|
}
|
|
|
|
void InternodeSyncer::publishTargetState(uint16_t targetID, TargetConsistencyState targetState)
|
|
{
|
|
App* app = Program::getApp();
|
|
NodeStore* mgmtNodes = app->getMgmtNodes();
|
|
|
|
log.log(Log_DEBUG, "Publishing state for target: " + StringTk::uintToStr(targetID) );
|
|
|
|
UInt16List targetIDs(1, targetID);
|
|
UInt8List states(1, targetState);
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if (!mgmtNode)
|
|
{
|
|
log.logErr("Management node not defined.");
|
|
return;
|
|
}
|
|
|
|
SetTargetConsistencyStatesMsg msg(NODETYPE_Storage, &targetIDs, &states, true);
|
|
|
|
const auto respMsg = MessagingTk::requestResponse(*mgmtNode, msg,
|
|
NETMSGTYPE_SetTargetConsistencyStatesResp);
|
|
|
|
if (!respMsg)
|
|
log.log(Log_CRITICAL, "Pushing target state to management node failed.");
|
|
else
|
|
{
|
|
auto* respMsgCast = (SetTargetConsistencyStatesRespMsg*)respMsg.get();
|
|
if ( (FhgfsOpsErr)respMsgCast->getValue() != FhgfsOpsErr_SUCCESS)
|
|
log.log(Log_CRITICAL, "Management node did not accept target state.");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets a list of target states changes (old/new), and reports the local ones (targets which are
|
|
* present in this storage server's storageTargetDataMap) to the mgmtd.
|
|
*/
|
|
bool InternodeSyncer::publishLocalTargetStateChanges(const TargetStateMap& oldStates,
|
|
const TargetStateMap& changes)
|
|
{
|
|
App* app = Program::getApp();
|
|
StorageTargets* storageTargets = app->getStorageTargets();
|
|
|
|
UInt16List localTargetIDs;
|
|
UInt8List localOldStates;
|
|
UInt8List localNewStates;
|
|
|
|
for (const auto& state : oldStates)
|
|
{
|
|
const uint16_t targetID = state.first;
|
|
auto* const target = storageTargets->getTarget(targetID);
|
|
|
|
if (!target)
|
|
continue;
|
|
|
|
// Don't report targets which have an offline timeout at the moment.
|
|
const auto waitRemaining = target->getOfflineTimeout();
|
|
if (waitRemaining)
|
|
{
|
|
LOG(GENERAL, WARNING, "Target was a primary target and needs a resync. "
|
|
"Waiting until it is marked offline on all clients.",
|
|
targetID, ("remainingMS", waitRemaining->count()));
|
|
continue;
|
|
}
|
|
|
|
localTargetIDs.push_back(state.first);
|
|
localOldStates.push_back(state.second.consistencyState);
|
|
|
|
const auto change = changes.find(state.first);
|
|
|
|
if (change != changes.end())
|
|
localNewStates.push_back(change->second.consistencyState);
|
|
else
|
|
localNewStates.push_back(state.second.consistencyState);
|
|
}
|
|
|
|
return publishTargetStateChanges(localTargetIDs, localOldStates, localNewStates);
|
|
}
|
|
|
|
/**
|
|
* Send a HeartbeatMsg to mgmt.
|
|
*
|
|
* @return true if node and targets registration successful
|
|
*/
|
|
bool InternodeSyncer::registerNode(AbstractDatagramListener* dgramLis)
|
|
{
|
|
static bool registrationFailureLogged = false; // to avoid log spamming
|
|
|
|
const char* logContext = "Register node";
|
|
|
|
App* app = Program::getApp();
|
|
Config* cfg = app->getConfig();
|
|
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if(!mgmtNode)
|
|
return false;
|
|
|
|
Node& localNode = app->getLocalNode();
|
|
NumNodeID localNodeNumID = localNode.getNumID();
|
|
NicAddressList nicList(localNode.getNicList() );
|
|
|
|
HeartbeatMsg msg(localNode.getAlias(), localNodeNumID, NODETYPE_Storage, &nicList);
|
|
msg.setPorts(cfg->getConnStoragePort(), cfg->getConnStoragePort() );
|
|
auto uuid = UUID::getMachineUUID();
|
|
if (uuid.empty()) {
|
|
LogContext(logContext).log(Log_CRITICAL,
|
|
"Couldn't determine UUID for machine. Node registration not possible.");
|
|
return false;
|
|
}
|
|
msg.setMachineUUID(uuid);
|
|
|
|
bool registered = dgramLis->sendToNodeUDPwithAck(mgmtNode, &msg);
|
|
|
|
if(registered)
|
|
LogContext(logContext).log(Log_WARNING, "Node registration successful.");
|
|
else
|
|
if(!registrationFailureLogged)
|
|
{
|
|
LogContext(logContext).log(Log_CRITICAL, "Node registration not successful. "
|
|
"Management node offline? Will keep on trying...");
|
|
registrationFailureLogged = true;
|
|
}
|
|
|
|
return registered;
|
|
}
|
|
|
|
/**
|
|
* Sent a MapTargetsMsg to mgmt.
|
|
*
|
|
* note: only called once at startup
|
|
*
|
|
* @return true if targets mapping successful.
|
|
*/
|
|
bool InternodeSyncer::registerTargetMappings()
|
|
{
|
|
static std::map<uint16_t, bool> registrationFailureLogged; // one for eacht target; to avoid log
|
|
// spamming
|
|
static bool commErrorLogged = false; // to avoid log spamming
|
|
|
|
App* app = Program::getApp();
|
|
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if(!mgmtNode)
|
|
return false;
|
|
|
|
bool registered = true;
|
|
|
|
Node& localNode = app->getLocalNode();
|
|
NumNodeID localNodeID = localNode.getNumID();
|
|
StorageTargets* targets = Program::getApp()->getStorageTargets();
|
|
std::map<uint16_t, StoragePoolId> targetPools;
|
|
|
|
MapTargetsRespMsg* respMsgCast;
|
|
|
|
// for each target, check if a storagePoolId file exists in the storage dir; if there is, try to
|
|
// directly put the target in the specified pool when mapping at mgmtd
|
|
// note: if file is not set readNumStoragePoolIDFile will return default pool
|
|
for (const auto& mapping : targets->getTargets())
|
|
{
|
|
const auto& targetPath = mapping.second->getPath().str();
|
|
|
|
targetPools.emplace(mapping.first,
|
|
StorageTk::readNumStoragePoolIDFile(targetPath, STORAGETK_STORAGEPOOLID_FILENAME));
|
|
}
|
|
|
|
MapTargetsMsg msg(targetPools, localNodeID);
|
|
|
|
const auto respMsg = MessagingTk::requestResponse(*mgmtNode, msg, NETMSGTYPE_MapTargetsResp);
|
|
if (respMsg)
|
|
{
|
|
// handle result
|
|
respMsgCast = (MapTargetsRespMsg*) respMsg.get();
|
|
|
|
const auto& results = respMsgCast->getResults();
|
|
|
|
for (const auto& mapping : targets->getTargets())
|
|
{
|
|
const auto targetID = mapping.first;
|
|
const auto result = results.find(targetID);
|
|
|
|
if (result == results.end())
|
|
{
|
|
registered = false;
|
|
|
|
LOG(GENERAL, CRITICAL, "Mgmt ignored target registration attempt.", targetID);
|
|
registrationFailureLogged[targetID] = true;
|
|
}
|
|
else if (result->second != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
registered = false;
|
|
|
|
if (!registrationFailureLogged[targetID])
|
|
{
|
|
LOG(GENERAL, CRITICAL, "Storage target registration rejected. Will keep on trying.",
|
|
targetID, ("error", result->second));
|
|
registrationFailureLogged[targetID] = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// registered successfully => remove STORAGETK_STORAGEPOOLID_FILENAME for this target,
|
|
// because it is only relevant for first registration
|
|
const auto& targetPath = mapping.second->getPath().str();
|
|
std::string storagePoolIdFileName = targetPath + "/" + STORAGETK_STORAGEPOOLID_FILENAME;
|
|
|
|
int unlinkRes = ::unlink(storagePoolIdFileName.c_str());
|
|
int errorCode = errno;
|
|
if ((unlinkRes != 0) && (errorCode != ENOENT))
|
|
{ // error; note: if file doesn't exist, that's not considered an error
|
|
LOG(GENERAL, WARNING, "Unable to unlink storage pool ID file", targetID, errorCode);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (!commErrorLogged)
|
|
{
|
|
LOG(GENERAL, CRITICAL, "Storage targets registration not successful. "
|
|
"Management node offline? Will keep on trying.");
|
|
commErrorLogged = true;
|
|
}
|
|
|
|
if (registered)
|
|
{
|
|
LOG(GENERAL, WARNING, "Storage targets registration successful.");
|
|
}
|
|
|
|
return registered;
|
|
}
|
|
|
|
bool InternodeSyncer::publishTargetStateChanges(UInt16List& targetIDs, UInt8List& oldStates,
|
|
UInt8List& newStates)
|
|
{
|
|
App* app = Program::getApp();
|
|
NodeStore* mgmtNodes = app->getMgmtNodes();
|
|
bool res;
|
|
|
|
log.log(Log_DEBUG, "Publishing target state change");
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if (!mgmtNode)
|
|
{
|
|
log.logErr("Management node not defined.");
|
|
return true; // Don't stall indefinitely if we don't have a management node.
|
|
}
|
|
|
|
ChangeTargetConsistencyStatesMsg msg(NODETYPE_Storage, &targetIDs, &oldStates, &newStates);
|
|
|
|
const auto respMsg = MessagingTk::requestResponse(*mgmtNode, msg,
|
|
NETMSGTYPE_ChangeTargetConsistencyStatesResp);
|
|
|
|
if (!respMsg)
|
|
{
|
|
log.log(Log_CRITICAL, "Pushing target state changes to management node failed.");
|
|
res = false; // Retry.
|
|
}
|
|
else
|
|
{
|
|
auto* respMsgCast = (ChangeTargetConsistencyStatesRespMsg*)respMsg.get();
|
|
|
|
if ( (FhgfsOpsErr)respMsgCast->getValue() != FhgfsOpsErr_SUCCESS)
|
|
{
|
|
log.log(Log_CRITICAL, "Management node did not accept target state changes.");
|
|
res = false; // States were changed while we evaluated the state changed. Try again.
|
|
}
|
|
else
|
|
res = true;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
void InternodeSyncer::requestBuddyTargetStates()
|
|
{
|
|
const char* logContext = "Request buddy target states";
|
|
|
|
TimerQueue* timerQ = Program::getApp()->getTimerQueue();
|
|
TargetMapper* targetMapper = Program::getApp()->getTargetMapper();
|
|
MirrorBuddyGroupMapper* buddyGroupMapper = Program::getApp()->getMirrorBuddyGroupMapper();
|
|
StorageTargets* storageTargets = Program::getApp()->getStorageTargets();
|
|
NodeStore* storageNodes = Program::getApp()->getStorageNodes();
|
|
TargetStateStore* targetStateStore = Program::getApp()->getTargetStateStore();
|
|
|
|
LogContext(logContext).log(LogTopic_STATES, Log_DEBUG, "Requesting buddy target states.");
|
|
|
|
// loop over all local targets
|
|
for (const auto& mapping : storageTargets->getTargets())
|
|
{
|
|
uint16_t targetID = mapping.first;
|
|
|
|
// check if target is part of a buddy group
|
|
uint16_t buddyTargetID = buddyGroupMapper->getBuddyTargetID(targetID);
|
|
if(!buddyTargetID)
|
|
continue;
|
|
|
|
// this target is part of a buddy group
|
|
|
|
NumNodeID nodeID = targetMapper->getNodeID(buddyTargetID);
|
|
if(!nodeID)
|
|
{ // mapping to node not found
|
|
LogContext(logContext).log(LogTopic_STATES, Log_ERR,
|
|
"Node-mapping for target ID " + StringTk::uintToStr(buddyTargetID) + " not found.");
|
|
continue;
|
|
}
|
|
|
|
auto node = storageNodes->referenceNode(nodeID);
|
|
if(!node)
|
|
{ // node not found
|
|
LogContext(logContext).log(LogTopic_STATES, Log_ERR,
|
|
"Unknown storage node. nodeID: " + nodeID.str() + "; targetID: "
|
|
+ StringTk::uintToStr(targetID));
|
|
continue;
|
|
}
|
|
|
|
// get reachability state of buddy target ID
|
|
CombinedTargetState currentState;
|
|
targetStateStore->getState(buddyTargetID, currentState);
|
|
|
|
if(currentState.reachabilityState == TargetReachabilityState_ONLINE)
|
|
{
|
|
// communicate
|
|
UInt16Vector queryTargetIDs(1, buddyTargetID);
|
|
GetTargetConsistencyStatesMsg msg(queryTargetIDs);
|
|
|
|
const auto respMsg = MessagingTk::requestResponse(*node, msg,
|
|
NETMSGTYPE_GetTargetConsistencyStatesResp);
|
|
if (!respMsg)
|
|
{ // communication failed
|
|
LogContext(logContext).log(LogTopic_STATES, Log_WARNING,
|
|
"Communication with buddy target failed. "
|
|
"nodeID: " + nodeID.str() + "; buddy targetID: "
|
|
+ StringTk::uintToStr(buddyTargetID));
|
|
|
|
continue;
|
|
}
|
|
|
|
// handle response
|
|
auto respMsgCast = (GetTargetConsistencyStatesRespMsg*)respMsg.get();
|
|
const auto& targetConsistencyStates = &respMsgCast->getStates();
|
|
|
|
// get received target information
|
|
// (note: we only requested a single target info, so the first one must be the
|
|
// requested one)
|
|
const TargetConsistencyState buddyTargetConsistencyState =
|
|
targetConsistencyStates->empty() ? TargetConsistencyState_BAD :
|
|
targetConsistencyStates->front();
|
|
|
|
auto& target = *storageTargets->getTargets().at(targetID);
|
|
|
|
// set last comm timestamp, but ignore it if we think buddy needs a resync
|
|
const bool buddyNeedsResync = target.getBuddyNeedsResync();
|
|
if((buddyTargetConsistencyState == TargetConsistencyState_GOOD) && !buddyNeedsResync)
|
|
target.setLastBuddyComm(std::chrono::system_clock::now(), false);
|
|
}
|
|
}
|
|
|
|
// requeue
|
|
timerQ->enqueue(std::chrono::seconds(30), requestBuddyTargetStates);
|
|
}
|
|
|
|
/**
|
|
* @param outTargetIDs
|
|
* @param outReachabilityStates
|
|
* @param outConsistencyStates
|
|
* @return false on error.
|
|
*/
|
|
bool InternodeSyncer::downloadAndSyncTargetStates(UInt16List& outTargetIDs,
|
|
UInt8List& outReachabilityStates, UInt8List& outConsistencyStates)
|
|
{
|
|
App* app = Program::getApp();
|
|
NodeStore* mgmtNodes = app->getMgmtNodes();
|
|
TargetStateStore* targetStateStore = app->getTargetStateStore();
|
|
|
|
auto node = mgmtNodes->referenceFirstNode();
|
|
if(!node)
|
|
return false;
|
|
|
|
bool downloadRes = NodesTk::downloadTargetStates(*node, NODETYPE_Storage,
|
|
&outTargetIDs, &outReachabilityStates, &outConsistencyStates, false);
|
|
|
|
if(downloadRes)
|
|
targetStateStore->syncStatesFromLists(outTargetIDs, outReachabilityStates,
|
|
outConsistencyStates);
|
|
|
|
return downloadRes;
|
|
}
|
|
|
|
/**
|
|
* @return false on error
|
|
*/
|
|
bool InternodeSyncer::downloadAndSyncNodes()
|
|
{
|
|
const char* logContext = "Nodes sync";
|
|
LogContext(logContext).log(LogTopic_STATES, Log_DEBUG, "Called.");
|
|
|
|
App* app = Program::getApp();
|
|
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
|
NodeStoreServers* metaNodes = app->getMetaNodes();
|
|
NodeStoreServers* storageNodes = app->getStorageNodes();
|
|
Node& localNode = app->getLocalNode();
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if(!mgmtNode)
|
|
return false;
|
|
|
|
|
|
{ // storage nodes
|
|
std::vector<NodeHandle> storageNodesList;
|
|
NumNodeIDList addedStorageNodes;
|
|
NumNodeIDList removedStorageNodes;
|
|
|
|
bool storageRes =
|
|
NodesTk::downloadNodes(*mgmtNode, NODETYPE_Storage, storageNodesList, true);
|
|
if(!storageRes)
|
|
goto err_release_mgmt;
|
|
|
|
storageNodes->syncNodes(storageNodesList, &addedStorageNodes, &removedStorageNodes,
|
|
&localNode);
|
|
printSyncNodesResults(NODETYPE_Storage, &addedStorageNodes, &removedStorageNodes);
|
|
}
|
|
|
|
|
|
{ // clients
|
|
std::vector<NodeHandle> clientsList;
|
|
StringList addedClients;
|
|
StringList removedClients;
|
|
|
|
bool clientsRes = NodesTk::downloadNodes(*mgmtNode, NODETYPE_Client, clientsList, true);
|
|
if(!clientsRes)
|
|
goto err_release_mgmt;
|
|
|
|
// note: storage App doesn't have a client node store, thus no clients->syncNodes() here
|
|
syncClientSessions(clientsList);
|
|
}
|
|
|
|
|
|
{ // metadata nodes
|
|
std::vector<NodeHandle> metaNodesList;
|
|
NumNodeIDList addedMetaNodes;
|
|
NumNodeIDList removedMetaNodes;
|
|
NumNodeID rootNodeID;
|
|
bool rootIsBuddyMirrored;
|
|
|
|
bool metaRes =
|
|
NodesTk::downloadNodes(*mgmtNode, NODETYPE_Meta, metaNodesList, true, &rootNodeID,
|
|
&rootIsBuddyMirrored);
|
|
if(!metaRes)
|
|
goto err_release_mgmt;
|
|
|
|
metaNodes->syncNodes(metaNodesList, &addedMetaNodes, &removedMetaNodes);
|
|
|
|
printSyncNodesResults(NODETYPE_Meta, &addedMetaNodes, &removedMetaNodes);
|
|
}
|
|
|
|
return true;
|
|
|
|
err_release_mgmt:
|
|
return false;
|
|
}
|
|
|
|
void InternodeSyncer::printSyncNodesResults(NodeType nodeType, NumNodeIDList* addedNodes,
|
|
NumNodeIDList* removedNodes)
|
|
{
|
|
const char* logContext = "Sync results";
|
|
|
|
if (!addedNodes->empty())
|
|
LogContext(logContext).log(LogTopic_STATES, Log_WARNING,
|
|
std::string("Nodes added: ") +
|
|
StringTk::uintToStr(addedNodes->size() ) +
|
|
" (Type: " + boost::lexical_cast<std::string>(nodeType) + ")");
|
|
|
|
if (!removedNodes->empty())
|
|
LogContext(logContext).log(LogTopic_STATES, Log_WARNING,
|
|
std::string("Nodes removed: ") +
|
|
StringTk::uintToStr(removedNodes->size() ) +
|
|
" (Type: " + boost::lexical_cast<std::string>(nodeType) + ")");
|
|
}
|
|
|
|
/**
|
|
* @return false on error
|
|
*/
|
|
bool InternodeSyncer::downloadAndSyncTargetMappings()
|
|
{
|
|
LogContext("Download target mappings").log(LogTopic_STATES, Log_DEBUG,
|
|
"Syncing target mappings.");
|
|
|
|
App* app = Program::getApp();
|
|
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
|
TargetMapper* targetMapper = app->getTargetMapper();
|
|
|
|
bool retVal = true;
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if(!mgmtNode)
|
|
return false;
|
|
|
|
auto mappings = NodesTk::downloadTargetMappings(*mgmtNode, true);
|
|
if (mappings.first)
|
|
targetMapper->syncTargets(std::move(mappings.second));
|
|
else
|
|
retVal = false;
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* @return false on error
|
|
*/
|
|
bool InternodeSyncer::downloadAndSyncMirrorBuddyGroups()
|
|
{
|
|
LogContext("Downlod mirror groups").log(LogTopic_STATES, Log_DEBUG,
|
|
"Syncing mirror groups.");
|
|
App* app = Program::getApp();
|
|
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
|
MirrorBuddyGroupMapper* buddyGroupMapper = app->getMirrorBuddyGroupMapper();
|
|
|
|
bool retVal = true;
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if(!mgmtNode)
|
|
return false;
|
|
|
|
UInt16List buddyGroupIDs;
|
|
UInt16List primaryTargetIDs;
|
|
UInt16List secondaryTargetIDs;
|
|
|
|
bool downloadRes = NodesTk::downloadMirrorBuddyGroups(*mgmtNode, NODETYPE_Storage,
|
|
&buddyGroupIDs, &primaryTargetIDs, &secondaryTargetIDs, true);
|
|
|
|
if(downloadRes)
|
|
{
|
|
buddyGroupMapper->syncGroupsFromLists(buddyGroupIDs, primaryTargetIDs, secondaryTargetIDs,
|
|
app->getLocalNode().getNumID());
|
|
}
|
|
else
|
|
retVal = false;
|
|
|
|
return retVal;
|
|
}
|
|
|
|
bool InternodeSyncer::downloadAndSyncStoragePools()
|
|
{
|
|
App* app = Program::getApp();
|
|
NodeStore* mgmtNodes = app->getMgmtNodes();
|
|
StoragePoolStore* storagePoolStore = app->getStoragePoolStore();
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if(!mgmtNode)
|
|
return false;
|
|
|
|
StoragePoolPtrVec storagePools;
|
|
|
|
bool downloadPoolsRes = NodesTk::downloadStoragePools(*mgmtNode, storagePools, true);
|
|
if(downloadPoolsRes)
|
|
storagePoolStore->syncFromVector(storagePools);
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Synchronize local client sessions with registered clients from mgmt to release orphaned sessions.
|
|
*
|
|
* @param clientsList must be ordered; contained nodes will be removed and may no longer be
|
|
* accessed after calling this method.
|
|
*/
|
|
void InternodeSyncer::syncClientSessions(const std::vector<NodeHandle>& clientsList)
|
|
{
|
|
const char* logContext = "Client sessions sync";
|
|
LogContext(logContext).log(LogTopic_STATES, Log_DEBUG, "Client session sync started.");
|
|
|
|
App* app = Program::getApp();
|
|
SessionStore* sessions = app->getSessions();
|
|
|
|
auto removedSessions = sessions->syncSessions(clientsList);
|
|
|
|
// print sessions removal results (upfront)
|
|
if (!removedSessions.empty())
|
|
{
|
|
std::ostringstream logMsgStream;
|
|
logMsgStream << "Removing " << removedSessions.size() << " client sessions. ";
|
|
LogContext(logContext).log(LogTopic_STATES, Log_DEBUG, logMsgStream.str() );
|
|
}
|
|
|
|
|
|
// remove each file of each session
|
|
auto sessionIter = removedSessions.begin();
|
|
for( ; sessionIter != removedSessions.end(); sessionIter++) // CLIENT SESSIONS LOOP
|
|
{ // walk over all client sessions: cleanup each session
|
|
auto& session = *sessionIter;
|
|
NumNodeID sessionID = session->getSessionID();
|
|
SessionLocalFileStore* sessionFiles = session->getLocalFiles();
|
|
|
|
auto removed = sessionFiles->removeAllSessions();
|
|
|
|
// print sessionFiles results (upfront)
|
|
if (removed)
|
|
{
|
|
std::ostringstream logMsgStream;
|
|
logMsgStream << sessionID << ": Removing " << removed << " file sessions.";
|
|
LogContext(logContext).log(LogTopic_STATES, Log_NOTICE, logMsgStream.str() );
|
|
}
|
|
} // end of client sessions loop
|
|
}
|
|
|
|
/**
|
|
* @return false on error
|
|
*/
|
|
bool InternodeSyncer::downloadExceededQuotaList(uint16_t targetId, QuotaDataType idType,
|
|
QuotaLimitType exType, UIntList* outIDList, FhgfsOpsErr& error)
|
|
{
|
|
App* app = Program::getApp();
|
|
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
|
|
|
bool retVal = false;
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if(!mgmtNode)
|
|
return false;
|
|
|
|
RequestExceededQuotaMsg msg(idType, exType, targetId);
|
|
|
|
RequestExceededQuotaRespMsg* respMsgCast = NULL;
|
|
|
|
const auto respMsg = MessagingTk::requestResponse(*mgmtNode, msg,
|
|
NETMSGTYPE_RequestExceededQuotaResp);
|
|
if (!respMsg)
|
|
goto err_exit;
|
|
|
|
// handle result
|
|
respMsgCast = (RequestExceededQuotaRespMsg*)respMsg.get();
|
|
|
|
respMsgCast->getExceededQuotaIDs()->swap(*outIDList);
|
|
error = respMsgCast->getError();
|
|
|
|
retVal = true;
|
|
|
|
err_exit:
|
|
return retVal;
|
|
}
|
|
|
|
bool InternodeSyncer::downloadAllExceededQuotaLists(
|
|
const std::map<uint16_t, std::unique_ptr<StorageTarget>>& targets)
|
|
{
|
|
bool retVal = true;
|
|
|
|
// note: this is fairly inefficient, but it is done only one on startup
|
|
for (const auto& mapping : targets)
|
|
{
|
|
if (!downloadAllExceededQuotaLists(mapping.first))
|
|
retVal = false;
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* @return false on error
|
|
*/
|
|
bool InternodeSyncer::downloadAllExceededQuotaLists(uint16_t targetId)
|
|
{
|
|
const char* logContext = "Exceeded quota sync";
|
|
|
|
App* app = Program::getApp();
|
|
Config* cfg = app->getConfig();
|
|
ExceededQuotaStorePtr exceededQuotaStore = app->getExceededQuotaStores()->get(targetId);
|
|
|
|
if (!exceededQuotaStore)
|
|
{
|
|
LOG(STORAGEPOOLS, ERR, "Could not access exceeded quota store.", targetId);
|
|
return false;
|
|
}
|
|
|
|
bool retVal = true;
|
|
|
|
UIntList tmpExceededUIDsSize;
|
|
UIntList tmpExceededGIDsSize;
|
|
UIntList tmpExceededUIDsInode;
|
|
UIntList tmpExceededGIDsInode;
|
|
|
|
FhgfsOpsErr error;
|
|
|
|
if (downloadExceededQuotaList(targetId, QuotaDataType_USER, QuotaLimitType_SIZE,
|
|
&tmpExceededUIDsSize, error) )
|
|
{
|
|
exceededQuotaStore->updateExceededQuota(&tmpExceededUIDsSize, QuotaDataType_USER,
|
|
QuotaLimitType_SIZE);
|
|
|
|
// enable or disable quota enforcement
|
|
if(error == FhgfsOpsErr_NOTSUPP)
|
|
{
|
|
if(cfg->getQuotaEnableEnforcement() )
|
|
{
|
|
LogContext(logContext).log(Log_DEBUG,
|
|
"Quota enforcement is enabled in the configuration of this storage server, "
|
|
"but not on the management daemon. "
|
|
"The configuration from the management daemon overrides the local setting.");
|
|
}
|
|
else
|
|
{
|
|
LogContext(logContext).log(Log_DEBUG, "Quota enforcement disabled by management daemon.");
|
|
}
|
|
|
|
cfg->setQuotaEnableEnforcement(false);
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
if(!cfg->getQuotaEnableEnforcement() )
|
|
{
|
|
LogContext(logContext).log(Log_DEBUG,
|
|
"Quota enforcement is enabled on the management daemon, "
|
|
"but not in the configuration of this storage server. "
|
|
"The configuration from the management daemon overrides the local setting.");
|
|
}
|
|
else
|
|
{
|
|
LogContext(logContext).log(Log_DEBUG, "Quota enforcement enabled by management daemon.");
|
|
}
|
|
|
|
cfg->setQuotaEnableEnforcement(true);
|
|
}
|
|
}
|
|
else
|
|
{ // error
|
|
LogContext(logContext).logErr("Unable to download exceeded file size quota for users.");
|
|
retVal = false;
|
|
}
|
|
|
|
if (downloadExceededQuotaList(targetId, QuotaDataType_GROUP, QuotaLimitType_SIZE,
|
|
&tmpExceededGIDsSize, error))
|
|
{
|
|
exceededQuotaStore->updateExceededQuota(&tmpExceededGIDsSize, QuotaDataType_GROUP,
|
|
QuotaLimitType_SIZE);
|
|
}
|
|
else
|
|
{ // error
|
|
LogContext(logContext).logErr("Unable to download exceeded file size quota for groups.");
|
|
retVal = false;
|
|
}
|
|
|
|
if (downloadExceededQuotaList(targetId, QuotaDataType_USER, QuotaLimitType_INODE,
|
|
&tmpExceededUIDsInode, error))
|
|
{
|
|
exceededQuotaStore->updateExceededQuota(&tmpExceededUIDsInode, QuotaDataType_USER,
|
|
QuotaLimitType_INODE);
|
|
}
|
|
else
|
|
{ // error
|
|
LogContext(logContext).logErr("Unable to download exceeded file number quota for users.");
|
|
retVal = false;
|
|
}
|
|
|
|
if (downloadExceededQuotaList(targetId, QuotaDataType_USER, QuotaLimitType_INODE,
|
|
&tmpExceededGIDsInode, error))
|
|
{
|
|
exceededQuotaStore->updateExceededQuota(&tmpExceededGIDsInode, QuotaDataType_GROUP,
|
|
QuotaLimitType_INODE);
|
|
}
|
|
else
|
|
{ // error
|
|
LogContext(logContext).logErr("Unable to download exceeded file number quota for groups.");
|
|
retVal = false;
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
/**
|
|
* Tell mgmtd to update its capacity pools.
|
|
*/
|
|
void InternodeSyncer::forceMgmtdPoolsRefresh()
|
|
{
|
|
App* app = Program::getApp();
|
|
DatagramListener* dgramLis = app->getDatagramListener();
|
|
NodeStoreServers* mgmtNodes = app->getMgmtNodes();
|
|
|
|
auto mgmtNode = mgmtNodes->referenceFirstNode();
|
|
if (!mgmtNode)
|
|
{
|
|
log.log(Log_DEBUG, "Management node not defined.");
|
|
return;
|
|
}
|
|
|
|
RefreshCapacityPoolsMsg msg;
|
|
|
|
bool ackReceived = dgramLis->sendToNodeUDPwithAck(mgmtNode, &msg);
|
|
|
|
if (!ackReceived)
|
|
log.log(Log_DEBUG, "Management node did not accept pools refresh request.");
|
|
}
|
|
|