New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

View File

@@ -0,0 +1,67 @@
#include "CleanUp.h"
#include <app/App.h>
CleanUp::CleanUp(App* app) :
PThread("CleanUp"), app(app)
{}
void CleanUp::run()
{
try
{
LOG(GENERAL, DEBUG, "Component started.");
registerSignalHandler();
loop();
LOG(GENERAL, DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
app->handleComponentException(e);
}
}
void CleanUp::loop()
{
const std::chrono::minutes idleDisconnectInterval(30);
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(idleDisconnectInterval).count()))
{
dropIdleConns();
}
}
void CleanUp::dropIdleConns()
{
unsigned numDroppedConns = 0;
numDroppedConns += dropIdleConnsByStore(app->getMgmtNodes());
numDroppedConns += dropIdleConnsByStore(app->getMetaNodes());
numDroppedConns += dropIdleConnsByStore(app->getStorageNodes());
if (numDroppedConns)
{
LOG(GENERAL, DEBUG, "Idle connections dropped", numDroppedConns);
}
}
unsigned CleanUp::dropIdleConnsByStore(NodeStoreServers* nodes)
{
unsigned numDroppedConns = 0;
const auto referencedNodes = nodes->referenceAllNodes();
for (auto node = referencedNodes.begin(); node != referencedNodes.end();
node++)
{
// don't do any idle disconnect stuff with local node
// (the LocalNodeConnPool doesn't support and doesn't need this kind of treatment)
if (*node != app->getLocalNode())
{
auto connPool = (*node)->getConnPool();
numDroppedConns += connPool->disconnectAndResetIdleStreams();
}
}
return numDroppedConns;
}

View File

@@ -0,0 +1,24 @@
#ifndef CLEANUP_H_
#define CLEANUP_H_
#include <common/threading/PThread.h>
#include <common/nodes/NodeStoreServers.h>
class App;
class CleanUp : public PThread
{
public:
CleanUp(App* app);
private:
App* const app;
virtual void run() override;
void loop();
void dropIdleConns();
unsigned dropIdleConnsByStore(NodeStoreServers* nodes);
};
#endif /* CLEANUP_H_ */

View File

@@ -0,0 +1,91 @@
#include "NodeListRequestor.h"
#include <common/toolkit/NodesTk.h>
#include <components/worker/GetNodesWork.h>
#include <app/App.h>
static const unsigned MGMT_NUM_TRIES = 3;
static const std::chrono::milliseconds MGMT_TIMEOUT{1000};
NodeListRequestor::NodeListRequestor(App* app) :
PThread("NodeListReq"), app(app)
{}
void NodeListRequestor::run()
{
try
{
LOG(GENERAL, DEBUG, "Component started.");
registerSignalHandler();
requestLoop();
LOG(GENERAL, DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
app->handleComponentException(e);
}
}
void NodeListRequestor::requestLoop()
{
do
{
// Get management node. Do this every time before updating node lists to check if
// management is online to prevent log spam from NodesTk::downloadNodes when it is
// not reachable
if (!getMgmtNodeInfo())
{
LOG(GENERAL, NOTICE, "Did not receive a response from management node!");
continue;
}
// try to reference first mgmt node (which is at the moment the only one)
std::shared_ptr<Node> mgmtNode = app->getMgmtNodes()->referenceFirstNode();
if (mgmtNode)
{
LOG(GENERAL, DEBUG, "Requesting node lists...");
app->getWorkQueue()->addIndirectWork(new GetNodesWork(mgmtNode, app->getMetaNodes(),
NODETYPE_Meta, app->getMetaBuddyGroupMapper(), app->getLocalNode()));
app->getWorkQueue()->addIndirectWork(new GetNodesWork(mgmtNode,
app->getStorageNodes(), NODETYPE_Storage, app->getStorageBuddyGroupMapper(),
app->getLocalNode()));
}
else
{
LOG(GENERAL, DEBUG, "Unable to reference management node for node list request.");
}
}
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(
app->getConfig()->getNodelistRequestInterval()).count()));
}
bool NodeListRequestor::getMgmtNodeInfo()
{
for (unsigned i = 0; i < MGMT_NUM_TRIES; i++)
{
LOG(GENERAL, DEBUG, "Waiting for management node...");
// get mgmtd node using NodesTk
auto mgmtNode = NodesTk::downloadNodeInfo(app->getConfig()->getSysMgmtdHost(),
app->getConfig()->getConnMgmtdPort(), app->getConfig()->getConnAuthHash(),
app->getNetMessageFactory(),
NODETYPE_Mgmt, MGMT_TIMEOUT.count());
if(mgmtNode)
{
app->getMgmtNodes()->addOrUpdateNodeEx(std::move(mgmtNode), nullptr);
return true;
}
if (PThread::waitForSelfTerminateOrder(std::chrono::milliseconds(MGMT_TIMEOUT).count()))
break;
}
return false;
}

View File

@@ -0,0 +1,20 @@
#ifndef NODELISTREQUESTOR_H_
#define NODELISTREQUESTOR_H_
#include <common/threading/PThread.h>
class App;
class NodeListRequestor : public PThread
{
public:
NodeListRequestor(App* app);
private:
App* const app;
virtual void run() override;
void requestLoop();
bool getMgmtNodeInfo();
};
#endif /*NODELISTREQUESTOR_H_*/

View File

@@ -0,0 +1,206 @@
#include "StatsCollector.h"
#include <common/toolkit/SocketTk.h>
#include <common/nodes/OpCounterTypes.h>
#include <app/App.h>
StatsCollector::StatsCollector(App* app) :
PThread("StatsCollector"), app(app)
{}
void StatsCollector::run()
{
try
{
LOG(GENERAL, DEBUG, "Component started.");
registerSignalHandler();
requestLoop();
LOG(GENERAL, DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
app->handleComponentException(e);
}
}
void StatsCollector::requestLoop()
{
bool collectClientOpsByNode = app->getConfig()->getCollectClientOpsByNode();
bool collectClientOpsByUser = app->getConfig()->getCollectClientOpsByUser();
// intially wait one query interval before requesting stats to give NodeListRequestor the time
// to retrieve the node lists
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(
app->getConfig()->getStatsRequestInterval()).count()))
{
{
LOG(GENERAL, DEBUG, "Requesting Stats...");
std::unique_lock<std::mutex> lock(mutex);
workItemCounter = 0;
metaResults.clear();
storageResults.clear();
// collect data
const auto& metaNodes = app->getMetaNodes()->referenceAllNodes();
for (auto node = metaNodes.begin(); node != metaNodes.end(); node++)
{
workItemCounter++;
app->getWorkQueue()->addIndirectWork(
new RequestMetaDataWork(std::static_pointer_cast<MetaNodeEx>(*node),
this, collectClientOpsByNode, collectClientOpsByUser));
}
const auto& storageNodes = app->getStorageNodes()->referenceAllNodes();
for (auto node = storageNodes.begin(); node != storageNodes.end(); node++)
{
workItemCounter++;
app->getWorkQueue()->addIndirectWork(
new RequestStorageDataWork(std::static_pointer_cast<StorageNodeEx>(*node),
this, collectClientOpsByNode, collectClientOpsByUser));
}
while (workItemCounter > 0)
condVar.wait(lock);
// write data
for (auto iter = metaResults.begin(); iter != metaResults.end(); iter++)
{
app->getTSDB()->insertMetaNodeData(iter->node, iter->data);
for (auto listIter = iter->highResStatsList.begin();
listIter != iter->highResStatsList.end(); listIter++)
{
app->getTSDB()->insertHighResMetaNodeData(iter->node, *listIter);
}
if (collectClientOpsByNode)
{
for (auto mapIter = iter->ipOpsUnorderedMap.begin();
mapIter != iter->ipOpsUnorderedMap.end(); mapIter++)
{
ipMetaClientOps.addOpsList(mapIter->first, mapIter->second);
}
}
if (collectClientOpsByUser)
{
for (auto mapIter = iter->userOpsUnorderedMap.begin();
mapIter != iter->userOpsUnorderedMap.end(); mapIter++)
{
userMetaClientOps.addOpsList(mapIter->first, mapIter->second);
}
}
}
for (auto iter = storageResults.begin(); iter != storageResults.end(); iter++)
{
app->getTSDB()->insertStorageNodeData(iter->node, iter->data);
for (auto listIter = iter->highResStatsList.begin();
listIter != iter->highResStatsList.end(); listIter++)
{
app->getTSDB()->insertHighResStorageNodeData(iter->node, *listIter);
}
for (auto listIter = iter->storageTargetList.begin();
listIter != iter->storageTargetList.end();
listIter++)
{
app->getTSDB()->insertStorageTargetsData(iter->node, *listIter);
}
if (collectClientOpsByNode)
{
for (auto mapIter = iter->ipOpsUnorderedMap.begin();
mapIter != iter->ipOpsUnorderedMap.end(); mapIter++)
{
ipStorageClientOps.addOpsList(mapIter->first, mapIter->second);
}
}
if (collectClientOpsByUser)
{
for (auto mapIter = iter->userOpsUnorderedMap.begin();
mapIter != iter->userOpsUnorderedMap.end(); mapIter++)
{
userStorageClientOps.addOpsList(mapIter->first, mapIter->second);
}
}
}
if (collectClientOpsByNode)
{
processClientOps(ipMetaClientOps, NODETYPE_Meta, false);
processClientOps(ipStorageClientOps, NODETYPE_Storage, false);
}
if (collectClientOpsByUser)
{
processClientOps(userMetaClientOps, NODETYPE_Meta, true);
processClientOps(userStorageClientOps, NODETYPE_Storage, true);
}
app->getTSDB()->write();
}
}
}
void StatsCollector::processClientOps(ClientOps& clientOps, NodeType nodeType, bool perUser)
{
ClientOps::IdOpsMap diffOpsMap;
ClientOps::OpsList sumOpsList;
diffOpsMap = clientOps.getDiffOpsMap();
sumOpsList = clientOps.getDiffSumOpsList();
if (!diffOpsMap.empty())
{
for (auto opsMapIter = diffOpsMap.begin();
opsMapIter != diffOpsMap.end();
opsMapIter++)
{
std::string id;
if (perUser)
{
if (opsMapIter->first == ~0U)
id = "undefined";
else
id = StringTk::uintToStr(opsMapIter->first);
}
else
{
struct in_addr inAddr = { (in_addr_t)opsMapIter->first };
id = Socket::ipaddrToStr(inAddr);
}
std::map<std::string, uint64_t> stringOpMap;
unsigned opCounter = 0;
for (auto opsListIter = opsMapIter->second.begin();
opsListIter != opsMapIter->second.end();
opsListIter++)
{
std::string opName;
if (nodeType == NODETYPE_Meta)
opName = OpToStringMapping::mapMetaOpNum(opCounter);
else if (nodeType == NODETYPE_Storage)
opName = OpToStringMapping::mapStorageOpNum(opCounter);
stringOpMap[opName] = *opsListIter;
opCounter++;
}
app->getTSDB()->insertClientNodeData(id, nodeType, stringOpMap, perUser);
}
}
clientOps.clear();
}

View File

@@ -0,0 +1,56 @@
#ifndef STATSCOLLECTOR_H_
#define STATSCOLLECTOR_H_
#include <common/threading/PThread.h>
#include <components/worker/RequestMetaDataWork.h>
#include <components/worker/RequestStorageDataWork.h>
#include <common/nodes/ClientOps.h>
#include <mutex>
#include <condition_variable>
class App;
class StatsCollector : public PThread
{
friend class RequestMetaDataWork;
friend class RequestStorageDataWork;
public:
StatsCollector(App* app);
private:
App* const app;
ClientOps ipMetaClientOps;
ClientOps ipStorageClientOps;
ClientOps userMetaClientOps;
ClientOps userStorageClientOps;
mutable std::mutex mutex;
int workItemCounter;
std::list<RequestMetaDataWork::Result> metaResults;
std::list<RequestStorageDataWork::Result> storageResults;
std::condition_variable condVar;
virtual void run() override;
void requestLoop();
void processClientOps(ClientOps& clientOps, NodeType nodeType, bool perUser);
void insertMetaData(RequestMetaDataWork::Result result)
{
const std::unique_lock<std::mutex> lock(mutex);
metaResults.push_back(std::move(result));
workItemCounter--;
condVar.notify_one();
}
void insertStorageData(RequestStorageDataWork::Result result)
{
const std::unique_lock<std::mutex> lock(mutex);
storageResults.push_back(std::move(result));
workItemCounter--;
condVar.notify_one();
}
};
#endif /*STATSCOLLECTOR_H_*/

View File

@@ -0,0 +1,40 @@
#include "GetNodesWork.h"
#include <common/toolkit/NodesTk.h>
void GetNodesWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
{
std::vector<std::shared_ptr<Node>> nodesList;
std::list<NumNodeID> addedNodes;
std::list<NumNodeID> removedNodes;
if (NodesTk::downloadNodes(*mgmtdNode, nodeType, nodesList, false))
{
// sync the downloaded list with the node store
nodes->syncNodes(nodesList, &addedNodes, &removedNodes, localNode.get());
if (!addedNodes.empty())
LOG(GENERAL, WARNING, "Nodes added.", ("addedNodes", addedNodes.size()), nodeType);
if (!removedNodes.empty())
LOG(GENERAL, WARNING, "Nodes removed.", ("removedNodes", removedNodes.size()), nodeType);
}
else
{
LOG(GENERAL, ERR, "Couldn't download server list from management daemon.", nodeType);
}
std::list<uint16_t> buddyGroupIDList;
std::list<uint16_t> primaryTargetIDList;
std::list<uint16_t> secondaryTargetIDList;
// update the storage buddy groups
if (NodesTk::downloadMirrorBuddyGroups(*mgmtdNode, nodeType, &buddyGroupIDList,
&primaryTargetIDList, &secondaryTargetIDList, false) )
{
buddyGroupMapper->syncGroupsFromLists(buddyGroupIDList, primaryTargetIDList,
secondaryTargetIDList, NumNodeID());
}
}

View File

@@ -0,0 +1,32 @@
#ifndef GETNODESWORK_H_
#define GETNODESWORK_H_
#include <common/components/worker/Work.h>
#include <common/nodes/MirrorBuddyGroupMapper.h>
#include <common/nodes/NodeType.h>
#include <common/nodes/NodeStoreServers.h>
class GetNodesWork : public Work
{
public:
GetNodesWork(std::shared_ptr<Node> mgmtdNode, NodeStoreServers *nodes, NodeType nodeType,
MirrorBuddyGroupMapper* buddyGroupMapper, std::shared_ptr<Node> localNode)
: mgmtdNode(std::move(mgmtdNode)),
nodes(nodes),
nodeType(nodeType),
buddyGroupMapper(buddyGroupMapper),
localNode(localNode)
{}
virtual void process(char* bufIn, unsigned bufInLen,
char* bufOut, unsigned bufOutLen) override;
private:
std::shared_ptr<Node> mgmtdNode;
NodeStoreServers* nodes;
NodeType nodeType;
MirrorBuddyGroupMapper* buddyGroupMapper;
std::shared_ptr<Node> localNode;
};
#endif /*GETNODESWORK_H_*/

View File

@@ -0,0 +1,69 @@
#include "RequestMetaDataWork.h"
#include <common/toolkit/MessagingTk.h>
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
#include <common/net/message/mon/RequestMetaDataMsg.h>
#include <common/net/message/mon/RequestMetaDataRespMsg.h>
#include <components/StatsCollector.h>
void RequestMetaDataWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
{
if (!node->getIsResponding())
{
HeartbeatRequestMsg heartbeatRequestMsg;
if(MessagingTk::requestResponse(*node, heartbeatRequestMsg,
NETMSGTYPE_Heartbeat))
{
LOG(GENERAL, DEBUG, "Node is responding again.",
("NodeID", node->getNodeIDWithTypeStr()));
node->setIsResponding(true);
}
}
Result result = {};
result.data.isResponding = false;
if (node->getIsResponding())
{
// generate the RequestDataMsg with the lastStatsTime
RequestMetaDataMsg requestDataMsg(node->getLastStatRequestTime().count());
auto respMsg = MessagingTk::requestResponse(*node, requestDataMsg,
NETMSGTYPE_RequestMetaDataResp);
if (!respMsg)
{
LOG(GENERAL, DEBUG, "Node is not responding.", ("NodeID", node->getNodeIDWithTypeStr()));
node->setIsResponding(false);
}
else
{
// get response and process it
auto metaRspMsg = static_cast<RequestMetaDataRespMsg*>(respMsg.get());
result.highResStatsList = std::move(metaRspMsg->getStatsList());
result.data.isResponding = true;
result.data.indirectWorkListSize = metaRspMsg->getIndirectWorkListSize();
result.data.directWorkListSize = metaRspMsg->getDirectWorkListSize();
result.data.sessionCount = metaRspMsg->getSessionCount();
result.data.hostnameid = metaRspMsg->gethostnameid();
if (!result.highResStatsList.empty())
{
auto lastStatsRequestTime = std::chrono::milliseconds(
result.highResStatsList.front().rawVals.statsTimeMS);
node->setLastStatRequestTime(lastStatsRequestTime);
}
if (collectClientOpsByNode)
result.ipOpsUnorderedMap = ClientOpsRequestor::request(*node, false);
if (collectClientOpsByUser)
result.userOpsUnorderedMap = ClientOpsRequestor::request(*node, true);
}
}
result.node = std::move(node);
statsCollector->insertMetaData(std::move(result));
}

View File

@@ -0,0 +1,42 @@
#ifndef REQUESTMETADATAWORK_H_
#define REQUESTMETADATAWORK_H_
#include <common/components/worker/Work.h>
#include <common/nodes/ClientOps.h>
#include <misc/TSDatabase.h>
#include <nodes/MetaNodeEx.h>
class StatsCollector;
class RequestMetaDataWork : public Work
{
public:
struct Result
{
std::shared_ptr<MetaNodeEx> node;
MetaNodeDataContent data;
HighResStatsList highResStatsList;
ClientOpsRequestor::IdOpsUnorderedMap ipOpsUnorderedMap;
ClientOpsRequestor::IdOpsUnorderedMap userOpsUnorderedMap;
};
RequestMetaDataWork(std::shared_ptr<MetaNodeEx> node,
StatsCollector* statsCollector,
bool collectClientOpsByNode, bool collectClientOpsByUser) :
node(std::move(node)),
statsCollector(statsCollector),
collectClientOpsByNode(collectClientOpsByNode),
collectClientOpsByUser(collectClientOpsByUser)
{}
virtual void process(char* bufIn, unsigned bufInLen,
char* bufOut, unsigned bufOutLen) override;
private:
std::shared_ptr<MetaNodeEx> node;
StatsCollector* statsCollector;
bool collectClientOpsByNode;
bool collectClientOpsByUser;
};
#endif /*REQUESTMETADATAWORK_H_*/

View File

@@ -0,0 +1,74 @@
#include "RequestStorageDataWork.h"
#include <common/toolkit/MessagingTk.h>
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
#include <common/net/message/mon/RequestStorageDataMsg.h>
#include <common/net/message/mon/RequestStorageDataRespMsg.h>
#include <components/StatsCollector.h>
void RequestStorageDataWork::process(char* bufIn, unsigned bufInLen,
char* bufOut, unsigned bufOutLen)
{
if (!node->getIsResponding())
{
HeartbeatRequestMsg heartbeatRequestMsg;
if(MessagingTk::requestResponse(*node, heartbeatRequestMsg,
NETMSGTYPE_Heartbeat))
{
LOG(GENERAL, DEBUG, "Node is responding again.",
("NodeID", node->getNodeIDWithTypeStr()));
node->setIsResponding(true);
}
}
Result result = {};
result.data.isResponding = false;
if (node->getIsResponding())
{
// generate the RequestStorageDataMsg with the lastStatsTime
RequestStorageDataMsg requestDataMsg(node->getLastStatRequestTime().count());
auto respMsg = MessagingTk::requestResponse(*node, requestDataMsg,
NETMSGTYPE_RequestStorageDataResp);
if (!respMsg)
{
LOG(GENERAL, DEBUG, "Node is not responding.", ("NodeID", node->getNodeIDWithTypeStr()));
node->setIsResponding(false);
}
else
{
// get response and process it
auto storageRspMsg = static_cast<RequestStorageDataRespMsg*>(respMsg.get());
result.highResStatsList = std::move(storageRspMsg->getStatsList());
result.storageTargetList = std::move(storageRspMsg->getStorageTargets());
result.data.isResponding = true;
result.data.indirectWorkListSize = storageRspMsg->getIndirectWorkListSize();
result.data.directWorkListSize = storageRspMsg->getDirectWorkListSize();
result.data.diskSpaceTotal = storageRspMsg->getDiskSpaceTotalMiB();
result.data.diskSpaceFree = storageRspMsg->getDiskSpaceFreeMiB();
result.data.sessionCount = storageRspMsg->getSessionCount();
result.data.hostnameid = storageRspMsg->gethostnameid();
if (!result.highResStatsList.empty())
{
auto lastStatsRequestTime = std::chrono::milliseconds(
result.highResStatsList.front().rawVals.statsTimeMS);
node->setLastStatRequestTime(lastStatsRequestTime);
}
if (collectClientOpsByNode)
result.ipOpsUnorderedMap = ClientOpsRequestor::request(*node, false);
if (collectClientOpsByUser)
result.userOpsUnorderedMap = ClientOpsRequestor::request(*node, true);
}
}
result.node = std::move(node);
statsCollector->insertStorageData(std::move(result));
}

View File

@@ -0,0 +1,44 @@
#ifndef REQUESTSTORAGEDATAWORK_H_
#define REQUESTSTORAGEDATAWORK_H_
#include <common/components/worker/Work.h>
#include <common/nodes/ClientOps.h>
#include <common/storage/StorageTargetInfo.h>
#include <misc/TSDatabase.h>
#include <nodes/StorageNodeEx.h>
class StatsCollector;
class RequestStorageDataWork : public Work
{
public:
struct Result
{
std::shared_ptr<StorageNodeEx> node;
StorageNodeDataContent data;
HighResStatsList highResStatsList;
StorageTargetInfoList storageTargetList;
ClientOpsRequestor::IdOpsUnorderedMap ipOpsUnorderedMap;
ClientOpsRequestor::IdOpsUnorderedMap userOpsUnorderedMap;
};
RequestStorageDataWork(std::shared_ptr<StorageNodeEx> node,
StatsCollector* statsCollector, bool collectClientOpsByNode,
bool collectClientOpsByUser) :
node(std::move(node)),
statsCollector(statsCollector),
collectClientOpsByNode(collectClientOpsByNode),
collectClientOpsByUser(collectClientOpsByUser)
{}
void process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen);
private:
std::shared_ptr<StorageNodeEx> node;
StatsCollector* statsCollector;
bool collectClientOpsByNode;
bool collectClientOpsByUser;
};
#endif /*REQUESTSTORAGEDATAWORK_H_*/