New upstream version 8.1.0
This commit is contained in:
324
mon/source/app/App.cpp
Normal file
324
mon/source/app/App.cpp
Normal file
@@ -0,0 +1,324 @@
|
||||
#include "App.h"
|
||||
|
||||
#include <app/SignalHandler.h>
|
||||
#include <common/components/ComponentInitException.h>
|
||||
#include <common/components/worker/DummyWork.h>
|
||||
#include <misc/Cassandra.h>
|
||||
#include <misc/InfluxDB.h>
|
||||
|
||||
|
||||
App::App(int argc, char** argv) :
|
||||
argc(argc), argv(argv)
|
||||
{}
|
||||
|
||||
void App::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
cfg = boost::make_unique<Config>(argc,argv);
|
||||
runNormal();
|
||||
appResult = AppCode::NO_ERROR;
|
||||
}
|
||||
catch (const InvalidConfigException& e)
|
||||
{
|
||||
std::ostringstream err;
|
||||
err << "Config error: " << e.what() << std::endl
|
||||
<< "[BeeGFS Mon Version: " << BEEGFS_VERSION << std::endl
|
||||
<< "Refer to the default config file (/etc/beegfs/beegfs-mon.conf)" << std::endl
|
||||
<< "or visit http://www.beegfs.com to find out about configuration options.]";
|
||||
printOrLogError(err.str());
|
||||
appResult = AppCode::INVALID_CONFIG;
|
||||
}
|
||||
catch (const ComponentInitException& e)
|
||||
{
|
||||
printOrLogError("Component initialization error: " + std::string(e.what()));
|
||||
appResult = AppCode::INITIALIZATION_ERROR;
|
||||
}
|
||||
catch (const std::runtime_error& e)
|
||||
{
|
||||
printOrLogError("Runtime error: " + std::string(e.what()));
|
||||
appResult = AppCode::RUNTIME_ERROR;
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
printOrLogError("Generic error: " + std::string(e.what()));
|
||||
appResult = AppCode::RUNTIME_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
void App::printOrLogError(const std::string& text) const
|
||||
{
|
||||
if (Logger::isInitialized())
|
||||
LOG(GENERAL, ERR, text);
|
||||
else
|
||||
std::cerr << std::endl << text << std::endl << std::endl;
|
||||
}
|
||||
|
||||
void App::runNormal()
|
||||
{
|
||||
Logger::createLogger(cfg->getLogLevel(), cfg->getLogType(), cfg->getLogNoDate(),
|
||||
cfg->getLogStdFile(), cfg->getLogNumLines(), cfg->getLogNumRotatedFiles());
|
||||
|
||||
pidFileLockFD = createAndLockPIDFile(cfg->getPIDFile());
|
||||
initDataObjects();
|
||||
SignalHandler::registerSignalHandler(this);
|
||||
initLocalNodeInfo();
|
||||
initWorkers();
|
||||
initComponents();
|
||||
|
||||
RDMASocket::rdmaForkInitOnce();
|
||||
|
||||
|
||||
if (cfg->getRunDaemonized())
|
||||
daemonize();
|
||||
|
||||
logInfos();
|
||||
|
||||
// make sure components don't receive SIGINT/SIGTERM (blocked signals are inherited)
|
||||
PThread::blockInterruptSignals();
|
||||
startWorkers();
|
||||
startComponents();
|
||||
PThread::unblockInterruptSignals();
|
||||
|
||||
joinComponents();
|
||||
joinWorkers();
|
||||
}
|
||||
|
||||
void App::initLocalNodeInfo()
|
||||
{
|
||||
bool useRDMA = cfg->getConnUseRDMA();
|
||||
unsigned portUDP = cfg->getConnMonPort();
|
||||
|
||||
StringList allowedInterfaces;
|
||||
std::string interfacesFilename = cfg->getConnInterfacesFile();
|
||||
if (interfacesFilename.length() )
|
||||
cfg->loadStringListFile(interfacesFilename.c_str(), allowedInterfaces);
|
||||
|
||||
NetworkInterfaceCard::findAll(&allowedInterfaces, useRDMA, &localNicList);
|
||||
|
||||
if (localNicList.empty() )
|
||||
throw InvalidConfigException("Couldn't find any usable NIC");
|
||||
|
||||
localNicList.sort(NetworkInterfaceCard::NicAddrComp{&allowedInterfaces});
|
||||
NetworkInterfaceCard::supportedCapabilities(&localNicList, &localNicCaps);
|
||||
|
||||
noDefaultRouteNets = std::make_shared<NetVector>();
|
||||
if(!initNoDefaultRouteList(noDefaultRouteNets.get()))
|
||||
throw InvalidConfigException("Failed to parse connNoDefaultRoute");
|
||||
|
||||
initRoutingTable();
|
||||
updateRoutingTable();
|
||||
|
||||
std::string nodeID = System::getHostname();
|
||||
|
||||
// TODO add a Mon nodetype at some point
|
||||
localNode = std::make_shared<LocalNode>(NODETYPE_Client, nodeID, NumNodeID(1), portUDP, 0, localNicList);
|
||||
}
|
||||
|
||||
void App::initDataObjects()
|
||||
{
|
||||
netFilter = boost::make_unique<NetFilter>(cfg->getConnNetFilterFile());
|
||||
tcpOnlyFilter = boost::make_unique<NetFilter>(cfg->getConnTcpOnlyFilterFile());
|
||||
netMessageFactory = boost::make_unique<NetMessageFactory>();
|
||||
workQueue = boost::make_unique<MultiWorkQueue>();
|
||||
|
||||
targetMapper = boost::make_unique<TargetMapper>();
|
||||
|
||||
metaNodes = boost::make_unique<NodeStoreMetaEx>();
|
||||
storageNodes = boost::make_unique<NodeStoreStorageEx>();
|
||||
mgmtNodes = boost::make_unique<NodeStoreMgmtEx>();
|
||||
|
||||
metaBuddyGroupMapper = boost::make_unique<MirrorBuddyGroupMapper>();
|
||||
storageBuddyGroupMapper = boost::make_unique<MirrorBuddyGroupMapper>();
|
||||
|
||||
|
||||
if (cfg->getDbType() == Config::DbTypes::CASSANDRA)
|
||||
{
|
||||
Cassandra::Config cassandraConfig;
|
||||
cassandraConfig.host = cfg->getDbHostName();
|
||||
cassandraConfig.port = cfg->getDbHostPort();
|
||||
cassandraConfig.database = cfg->getDbDatabase();
|
||||
cassandraConfig.maxInsertsPerBatch = cfg->getCassandraMaxInsertsPerBatch();
|
||||
cassandraConfig.TTLSecs = cfg->getCassandraTTLSecs();
|
||||
|
||||
tsdb = boost::make_unique<Cassandra>(std::move(cassandraConfig));
|
||||
}
|
||||
else // Config::DbTypes::INFLUXDB OR Config::DbTypes::INFLUXDB2
|
||||
{
|
||||
InfluxDB::Config influxdbConfig;
|
||||
influxdbConfig.host = cfg->getDbHostName();
|
||||
influxdbConfig.port = cfg->getDbHostPort();
|
||||
influxdbConfig.maxPointsPerRequest = cfg->getInfluxdbMaxPointsPerRequest();
|
||||
influxdbConfig.httpTimeout = cfg->getHttpTimeout();
|
||||
influxdbConfig.curlCheckSSLCertificates = cfg->getCurlCheckSSLCertificates();
|
||||
if (cfg->getDbType() == Config::DbTypes::INFLUXDB2)
|
||||
{
|
||||
influxdbConfig.bucket = cfg->getDbBucket();
|
||||
influxdbConfig.organization = cfg->getDbAuthOrg();
|
||||
influxdbConfig.token = cfg->getDbAuthToken();
|
||||
influxdbConfig.dbVersion = INFLUXDB2;
|
||||
}
|
||||
else
|
||||
{
|
||||
influxdbConfig.database = cfg->getDbDatabase();
|
||||
influxdbConfig.setRetentionPolicy = cfg->getInfluxDbSetRetentionPolicy();
|
||||
influxdbConfig.retentionDuration = cfg->getInfluxDbRetentionDuration();
|
||||
influxdbConfig.username = cfg->getDbAuthUsername();
|
||||
influxdbConfig.password = cfg->getDbAuthPassword();
|
||||
influxdbConfig.dbVersion = INFLUXDB;
|
||||
}
|
||||
tsdb = boost::make_unique<InfluxDB>(std::move(influxdbConfig));
|
||||
}
|
||||
}
|
||||
|
||||
void App::initComponents()
|
||||
{
|
||||
nodeListRequestor = boost::make_unique<NodeListRequestor>(this);
|
||||
statsCollector = boost::make_unique<StatsCollector>(this);
|
||||
cleanUp = boost::make_unique<CleanUp>(this);
|
||||
}
|
||||
|
||||
void App::startComponents()
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Starting components...");
|
||||
nodeListRequestor->start();
|
||||
statsCollector->start();
|
||||
cleanUp->start();
|
||||
LOG(GENERAL, DEBUG, "Components running.");
|
||||
}
|
||||
|
||||
void App::stopComponents()
|
||||
{
|
||||
if (nodeListRequestor)
|
||||
nodeListRequestor->selfTerminate();
|
||||
if (statsCollector)
|
||||
statsCollector->selfTerminate();
|
||||
if (cleanUp)
|
||||
cleanUp->selfTerminate();
|
||||
|
||||
stopWorkers();
|
||||
selfTerminate();
|
||||
}
|
||||
|
||||
void App::joinComponents()
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Joining Component threads...");
|
||||
nodeListRequestor->join();
|
||||
statsCollector->join();
|
||||
cleanUp->join();
|
||||
LOG(GENERAL, CRITICAL, "All components stopped. Exiting now.");
|
||||
}
|
||||
|
||||
void App::initWorkers()
|
||||
{
|
||||
const unsigned numDirectWorkers = 1;
|
||||
const unsigned workersBufSize = 1024*1024;
|
||||
|
||||
unsigned numWorkers = cfg->getTuneNumWorkers();
|
||||
|
||||
for (unsigned i=0; i < numWorkers; i++)
|
||||
{
|
||||
auto worker = boost::make_unique<Worker>("Worker" + StringTk::intToStr(i+1),
|
||||
workQueue.get(), QueueWorkType_INDIRECT);
|
||||
|
||||
worker->setBufLens(workersBufSize, workersBufSize);
|
||||
workerList.push_back(std::move(worker));
|
||||
}
|
||||
|
||||
for (unsigned i=0; i < numDirectWorkers; i++)
|
||||
{
|
||||
auto worker = boost::make_unique<Worker>("DirectWorker" + StringTk::intToStr(i+1),
|
||||
workQueue.get(), QueueWorkType_DIRECT);
|
||||
|
||||
worker->setBufLens(workersBufSize, workersBufSize);
|
||||
workerList.push_back(std::move(worker));
|
||||
}
|
||||
}
|
||||
|
||||
void App::startWorkers()
|
||||
{
|
||||
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
|
||||
{
|
||||
(*worker)->start();
|
||||
}
|
||||
}
|
||||
|
||||
void App::stopWorkers()
|
||||
{
|
||||
// need two loops because we don't know if the worker that handles the work will be the same that
|
||||
// received the self-terminate-request
|
||||
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
|
||||
{
|
||||
(*worker)->selfTerminate();
|
||||
|
||||
// add dummy work to wake up the worker immediately for faster self termination
|
||||
PersonalWorkQueue* personalQ = (*worker)->getPersonalWorkQueue();
|
||||
workQueue->addPersonalWork(new DummyWork(), personalQ);
|
||||
}
|
||||
}
|
||||
|
||||
void App::joinWorkers()
|
||||
{
|
||||
|
||||
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
|
||||
{
|
||||
waitForComponentTermination((*worker).get());
|
||||
}
|
||||
}
|
||||
|
||||
void App::logInfos()
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, std::string("Version: ") + BEEGFS_VERSION);
|
||||
#ifdef BEEGFS_DEBUG
|
||||
LOG(GENERAL, DEBUG, "--DEBUG VERSION--");
|
||||
#endif
|
||||
|
||||
// list usable network interfaces
|
||||
NicAddressList nicList = getLocalNicList();
|
||||
logUsableNICs(NULL, nicList);
|
||||
|
||||
// print net filters
|
||||
if (netFilter->getNumFilterEntries() )
|
||||
{
|
||||
LOG(GENERAL, WARNING, std::string("Net filters: ")
|
||||
+ StringTk::uintToStr(netFilter->getNumFilterEntries() ) );
|
||||
}
|
||||
|
||||
if (tcpOnlyFilter->getNumFilterEntries() )
|
||||
{
|
||||
LOG(GENERAL, WARNING, std::string("TCP-only filters: ")
|
||||
+ StringTk::uintToStr(tcpOnlyFilter->getNumFilterEntries() ) );
|
||||
}
|
||||
}
|
||||
|
||||
void App::daemonize()
|
||||
{
|
||||
int nochdir = 1; // 1 to keep working directory
|
||||
int noclose = 0; // 1 to keep stdin/-out/-err open
|
||||
|
||||
LOG(GENERAL, CRITICAL, "Detaching process...");
|
||||
|
||||
int detachRes = daemon(nochdir, noclose);
|
||||
if (detachRes == -1)
|
||||
throw std::runtime_error(std::string("Unable to detach process: ")
|
||||
+ System::getErrString());
|
||||
|
||||
updateLockedPIDFile(pidFileLockFD); // ignored if pidFileFD is -1
|
||||
}
|
||||
|
||||
void App::handleComponentException(std::exception& e)
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, "This component encountered an unrecoverable error.", sysErr,
|
||||
("Exception", e.what()));
|
||||
|
||||
LOG(GENERAL, WARNING, "Shutting down...");
|
||||
stopComponents();
|
||||
}
|
||||
|
||||
void App::handleNetworkInterfaceFailure(const std::string& devname)
|
||||
{
|
||||
// Nothing to do. This App has no internodeSyncer that would rescan the
|
||||
// netdevs.
|
||||
LOG(GENERAL, ERR, "Network interface failure.",
|
||||
("Device", devname));
|
||||
}
|
||||
184
mon/source/app/App.h
Normal file
184
mon/source/app/App.h
Normal file
@@ -0,0 +1,184 @@
|
||||
#ifndef APP_H_
|
||||
#define APP_H_
|
||||
|
||||
#include <app/Config.h>
|
||||
#include <common/app/AbstractApp.h>
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <common/Common.h>
|
||||
#include <common/components/worker/Worker.h>
|
||||
#include <common/nodes/LocalNode.h>
|
||||
#include <common/nodes/NodeStoreClients.h>
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/toolkit/NetFilter.h>
|
||||
#include <common/toolkit/NodesTk.h>
|
||||
#include <misc/TSDatabase.h>
|
||||
#include <components/CleanUp.h>
|
||||
#include <components/StatsCollector.h>
|
||||
#include <components/NodeListRequestor.h>
|
||||
#include <net/message/NetMessageFactory.h>
|
||||
#include <nodes/NodeStoreMetaEx.h>
|
||||
#include <nodes/NodeStoreStorageEx.h>
|
||||
#include <nodes/NodeStoreMgmtEx.h>
|
||||
|
||||
class App : public AbstractApp
|
||||
{
|
||||
public:
|
||||
enum AppCode
|
||||
{
|
||||
NO_ERROR = 0,
|
||||
INVALID_CONFIG = 1,
|
||||
INITIALIZATION_ERROR = 2,
|
||||
RUNTIME_ERROR = 3
|
||||
};
|
||||
|
||||
|
||||
App(int argc, char** argv);
|
||||
|
||||
virtual void run() override;
|
||||
virtual void stopComponents() override;
|
||||
virtual void handleComponentException(std::exception& e) override;
|
||||
virtual void handleNetworkInterfaceFailure(const std::string& devname) override;
|
||||
|
||||
|
||||
private:
|
||||
int appResult;
|
||||
int argc;
|
||||
char** argv;
|
||||
LockFD pidFileLockFD;
|
||||
|
||||
std::unique_ptr<TargetMapper> targetMapper;
|
||||
|
||||
std::unique_ptr<Config> cfg;
|
||||
std::unique_ptr<NetFilter> netFilter;
|
||||
std::unique_ptr<NetFilter> tcpOnlyFilter;
|
||||
std::unique_ptr<NetMessageFactory> netMessageFactory;
|
||||
NicListCapabilities localNicCaps;
|
||||
std::shared_ptr<Node> localNode;
|
||||
std::unique_ptr<TSDatabase> tsdb;
|
||||
std::unique_ptr<MultiWorkQueue> workQueue;
|
||||
|
||||
std::unique_ptr<NodeStoreMgmtEx> mgmtNodes;
|
||||
std::unique_ptr<NodeStoreMetaEx> metaNodes;
|
||||
std::unique_ptr<NodeStoreStorageEx> storageNodes;
|
||||
std::unique_ptr<MirrorBuddyGroupMapper> metaBuddyGroupMapper;
|
||||
std::unique_ptr<MirrorBuddyGroupMapper> storageBuddyGroupMapper;
|
||||
|
||||
std::unique_ptr<NodeListRequestor> nodeListRequestor;
|
||||
std::unique_ptr<StatsCollector> statsCollector;
|
||||
std::unique_ptr<CleanUp> cleanUp;
|
||||
|
||||
std::list<std::unique_ptr<Worker>> workerList;
|
||||
|
||||
void printOrLogError(const std::string& text) const;
|
||||
|
||||
void runNormal();
|
||||
void initDataObjects();
|
||||
void initComponents();
|
||||
void startComponents();
|
||||
void joinComponents();
|
||||
void initWorkers();
|
||||
void startWorkers();
|
||||
void stopWorkers();
|
||||
void joinWorkers();
|
||||
void initLocalNodeInfo();
|
||||
void logInfos();
|
||||
void daemonize();
|
||||
|
||||
public:
|
||||
NodeStoreServers* getServerStoreFromType(NodeType nodeType)
|
||||
{
|
||||
switch (nodeType)
|
||||
{
|
||||
case NODETYPE_Meta:
|
||||
return metaNodes.get();
|
||||
|
||||
case NODETYPE_Storage:
|
||||
return storageNodes.get();
|
||||
|
||||
case NODETYPE_Mgmt:
|
||||
return mgmtNodes.get();
|
||||
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
virtual ICommonConfig* getCommonConfig() const override
|
||||
{
|
||||
return cfg.get();
|
||||
}
|
||||
|
||||
virtual NetFilter* getNetFilter() const override
|
||||
{
|
||||
return netFilter.get();
|
||||
}
|
||||
|
||||
virtual NetFilter* getTcpOnlyFilter() const override
|
||||
{
|
||||
return tcpOnlyFilter.get();
|
||||
}
|
||||
|
||||
virtual AbstractNetMessageFactory* getNetMessageFactory() const override
|
||||
{
|
||||
return netMessageFactory.get();
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> getLocalNode()
|
||||
{
|
||||
return localNode;
|
||||
}
|
||||
|
||||
Config* getConfig()
|
||||
{
|
||||
return cfg.get();
|
||||
}
|
||||
|
||||
MultiWorkQueue *getWorkQueue()
|
||||
{
|
||||
return workQueue.get();
|
||||
}
|
||||
|
||||
NodeStoreMetaEx *getMetaNodes()
|
||||
{
|
||||
return metaNodes.get();
|
||||
}
|
||||
|
||||
NodeStoreStorageEx *getStorageNodes()
|
||||
{
|
||||
return storageNodes.get();
|
||||
}
|
||||
|
||||
NodeStoreMgmtEx *getMgmtNodes()
|
||||
{
|
||||
return mgmtNodes.get();
|
||||
}
|
||||
|
||||
TSDatabase *getTSDB()
|
||||
{
|
||||
return tsdb.get();
|
||||
}
|
||||
|
||||
TargetMapper* getTargetMapper()
|
||||
{
|
||||
return targetMapper.get();
|
||||
}
|
||||
|
||||
MirrorBuddyGroupMapper* getMetaBuddyGroupMapper()
|
||||
{
|
||||
return metaBuddyGroupMapper.get();
|
||||
}
|
||||
|
||||
MirrorBuddyGroupMapper* getStorageBuddyGroupMapper()
|
||||
{
|
||||
return storageBuddyGroupMapper.get();
|
||||
}
|
||||
|
||||
int getAppResult()
|
||||
{
|
||||
return appResult;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif /*APP_H_*/
|
||||
210
mon/source/app/Config.cpp
Normal file
210
mon/source/app/Config.cpp
Normal file
@@ -0,0 +1,210 @@
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include "Config.h"
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#define CONFIG_DEFAULT_CFGFILENAME "/etc/beegfs/beegfs-mon.conf"
|
||||
|
||||
Config::Config(int argc, char** argv): AbstractConfig(argc, argv)
|
||||
{
|
||||
initConfig(argc, argv, true);
|
||||
|
||||
// check mandatory value
|
||||
if(getSysMgmtdHost().empty())
|
||||
throw InvalidConfigException("Management host undefined.");
|
||||
|
||||
// Load auth config file
|
||||
if (!dbAuthFile.empty())
|
||||
{
|
||||
std::ifstream authConfig(dbAuthFile);
|
||||
|
||||
if (!authConfig.good())
|
||||
throw InvalidConfigException("Could not open InfluxDB authentication file");
|
||||
|
||||
StringMap authMap;
|
||||
MapTk::loadStringMapFromFile(dbAuthFile.c_str(), &authMap);
|
||||
|
||||
for (const auto& e : authMap) {
|
||||
if (e.first == "password") {
|
||||
dbAuthPassword = e.second;
|
||||
} else if (e.first == "username") {
|
||||
dbAuthUsername = e.second;
|
||||
} else if (e.first == "organization") {
|
||||
dbAuthOrg = e.second;
|
||||
} else if (e.first == "token") {
|
||||
dbAuthToken = e.second;
|
||||
} else {
|
||||
throw InvalidConfigException("The InfluxDB authentication file may only contain "
|
||||
"the options username and password for influxdb version 1.x "
|
||||
"organization and token for influxdb version 2.x" );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Config::loadDefaults(bool addDashes)
|
||||
{
|
||||
AbstractConfig::loadDefaults();
|
||||
|
||||
// re-definitions
|
||||
configMapRedefine("cfgFile", "");
|
||||
configMapRedefine("connUseRDMA", "false");
|
||||
|
||||
// own definitions
|
||||
configMapRedefine("connInterfacesFile", "");
|
||||
configMapRedefine("tuneNumWorkers", "4");
|
||||
configMapRedefine("runDaemonized", "false");
|
||||
configMapRedefine("pidFile", "");
|
||||
|
||||
configMapRedefine("dbType", "influxdb");
|
||||
configMapRedefine("dbHostName", "localhost");
|
||||
configMapRedefine("dbHostPort", "8086");
|
||||
configMapRedefine("dbDatabase", "beegfs_mon");
|
||||
configMapRedefine("dbAuthFile", "");
|
||||
|
||||
// those are used by influxdb only but are kept like this for compatibility
|
||||
configMapRedefine("dbMaxPointsPerRequest", "5000");
|
||||
configMapRedefine("dbSetRetentionPolicy", "true");
|
||||
configMapRedefine("dbRetentionDuration", "1d");
|
||||
|
||||
configMapRedefine("dbBucket", "");
|
||||
|
||||
configMapRedefine("cassandraMaxInsertsPerBatch","25");
|
||||
configMapRedefine("cassandraTTLSecs", "86400");
|
||||
|
||||
configMapRedefine("collectClientOpsByNode", "true");
|
||||
configMapRedefine("collectClientOpsByUser", "true");
|
||||
|
||||
configMapRedefine("httpTimeoutMSecs", "1000");
|
||||
configMapRedefine("statsRequestIntervalSecs", "5");
|
||||
configMapRedefine("nodelistRequestIntervalSecs","30");
|
||||
|
||||
configMapRedefine("curlCheckSSLCertificates", "true");
|
||||
|
||||
}
|
||||
|
||||
void Config::applyConfigMap(bool enableException, bool addDashes)
|
||||
{
|
||||
AbstractConfig::applyConfigMap(false);
|
||||
|
||||
for (StringMapIter iter = configMap.begin(); iter != configMap.end(); )
|
||||
{
|
||||
bool unknownElement = false;
|
||||
|
||||
if (iter->first == std::string("logType"))
|
||||
{
|
||||
if (iter->second == "syslog")
|
||||
{
|
||||
logType = LogType_SYSLOG;
|
||||
}
|
||||
else if (iter->second == "logfile")
|
||||
{
|
||||
logType = LogType_LOGFILE;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw InvalidConfigException("The value of config argument logType is invalid:"
|
||||
" Must be syslog or logfile.");
|
||||
}
|
||||
}
|
||||
else if (iter->first == std::string("connInterfacesFile"))
|
||||
connInterfacesFile = iter->second;
|
||||
else
|
||||
if (iter->first == std::string("tuneNumWorkers"))
|
||||
tuneNumWorkers = StringTk::strToUInt(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("runDaemonized"))
|
||||
runDaemonized = StringTk::strToBool(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("pidFile"))
|
||||
pidFile = iter->second;
|
||||
else
|
||||
if (iter->first == std::string("dbType"))
|
||||
{
|
||||
if (iter->second == "influxdb")
|
||||
dbType = DbTypes::INFLUXDB;
|
||||
else if (iter->second == "influxdb2")
|
||||
dbType = DbTypes::INFLUXDB2;
|
||||
else if (iter->second == "cassandra")
|
||||
dbType = DbTypes::CASSANDRA;
|
||||
else
|
||||
throw InvalidConfigException("The value of config argument dbType is invalid:"
|
||||
" Must be influxdb or cassandra.");
|
||||
}
|
||||
else
|
||||
if (iter->first == std::string("dbHostName"))
|
||||
dbHostName = iter->second;
|
||||
else
|
||||
if (iter->first == std::string("dbHostPort"))
|
||||
dbHostPort = StringTk::strToUInt(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("dbDatabase"))
|
||||
dbDatabase = iter->second;
|
||||
else
|
||||
if (iter->first == std::string("dbAuthFile"))
|
||||
dbAuthFile = iter->second;
|
||||
else
|
||||
// those are used by influxdb only but are kept like this for compatibility
|
||||
if (iter->first == std::string("dbMaxPointsPerRequest"))
|
||||
influxdbMaxPointsPerRequest = StringTk::strToUInt(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("dbSetRetentionPolicy"))
|
||||
influxdbSetRetentionPolicy = StringTk::strToBool(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("dbRetentionDuration"))
|
||||
influxdbRetentionDuration = iter->second;
|
||||
else
|
||||
// those are used by influxdb2
|
||||
if (iter->first == std::string("dbBucket"))
|
||||
dbBucket = iter->second;
|
||||
else
|
||||
|
||||
if (iter->first == std::string("cassandraMaxInsertsPerBatch"))
|
||||
cassandraMaxInsertsPerBatch = StringTk::strToUInt(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("cassandraTTLSecs"))
|
||||
cassandraTTLSecs = StringTk::strToUInt(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("collectClientOpsByNode"))
|
||||
collectClientOpsByNode = StringTk::strToBool(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("collectClientOpsByUser"))
|
||||
collectClientOpsByUser = StringTk::strToBool(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("httpTimeoutMSecs"))
|
||||
httpTimeout = std::chrono::milliseconds(StringTk::strToUInt(iter->second));
|
||||
else
|
||||
if (iter->first == std::string("statsRequestIntervalSecs"))
|
||||
statsRequestInterval = std::chrono::seconds(StringTk::strToUInt(iter->second));
|
||||
else
|
||||
if (iter->first == std::string("nodelistRequestIntervalSecs"))
|
||||
nodelistRequestInterval = std::chrono::seconds(StringTk::strToUInt(iter->second));
|
||||
else
|
||||
if (iter->first == std::string("curlCheckSSLCertificates"))
|
||||
curlCheckSSLCertificates = StringTk::strToBool(iter->second);
|
||||
else
|
||||
{
|
||||
unknownElement = true;
|
||||
|
||||
if (enableException)
|
||||
{
|
||||
throw InvalidConfigException(std::string("The config argument '")
|
||||
+ iter->first + std::string("' is invalid.") );
|
||||
}
|
||||
}
|
||||
|
||||
if (unknownElement)
|
||||
{
|
||||
iter++;
|
||||
}
|
||||
else
|
||||
{
|
||||
iter = eraseFromConfigMap(iter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Config::initImplicitVals()
|
||||
{
|
||||
AbstractConfig::initConnAuthHash(connAuthFile, &connAuthHash);
|
||||
}
|
||||
179
mon/source/app/Config.h
Normal file
179
mon/source/app/Config.h
Normal file
@@ -0,0 +1,179 @@
|
||||
#ifndef CONFIG_H_
|
||||
#define CONFIG_H_
|
||||
|
||||
#include <common/app/config/AbstractConfig.h>
|
||||
|
||||
|
||||
class Config : public AbstractConfig
|
||||
{
|
||||
public:
|
||||
Config(int argc, char** argv);
|
||||
|
||||
enum DbTypes
|
||||
{
|
||||
INFLUXDB,
|
||||
INFLUXDB2,
|
||||
CASSANDRA
|
||||
};
|
||||
|
||||
private:
|
||||
// configurables
|
||||
std::string connInterfacesFile;
|
||||
unsigned tuneNumWorkers;
|
||||
bool runDaemonized;
|
||||
std::string pidFile;
|
||||
|
||||
// mon-specific configurables
|
||||
DbTypes dbType;
|
||||
std::string dbHostName;
|
||||
unsigned dbHostPort;
|
||||
std::string dbDatabase;
|
||||
std::string dbBucket;
|
||||
std::string dbAuthFile;
|
||||
unsigned influxdbMaxPointsPerRequest;
|
||||
bool influxdbSetRetentionPolicy;
|
||||
std::string influxdbRetentionDuration;
|
||||
unsigned cassandraMaxInsertsPerBatch;
|
||||
unsigned cassandraTTLSecs;
|
||||
bool collectClientOpsByNode;
|
||||
bool collectClientOpsByUser;
|
||||
std::chrono::milliseconds httpTimeout;
|
||||
std::chrono::seconds statsRequestInterval;
|
||||
std::chrono::seconds nodelistRequestInterval;
|
||||
bool curlCheckSSLCertificates;
|
||||
|
||||
std::string dbAuthUsername;
|
||||
std::string dbAuthPassword;
|
||||
std::string dbAuthOrg;
|
||||
std::string dbAuthToken;
|
||||
|
||||
|
||||
virtual void loadDefaults(bool addDashes) override;
|
||||
virtual void applyConfigMap(bool enableException, bool addDashes) override;
|
||||
virtual void initImplicitVals() override;
|
||||
|
||||
public:
|
||||
// getters & setters
|
||||
|
||||
const std::string& getConnInterfacesFile() const
|
||||
{
|
||||
return connInterfacesFile;
|
||||
}
|
||||
|
||||
unsigned getTuneNumWorkers() const
|
||||
{
|
||||
return tuneNumWorkers;
|
||||
}
|
||||
|
||||
bool getRunDaemonized() const
|
||||
{
|
||||
return runDaemonized;
|
||||
}
|
||||
|
||||
const std::string& getPIDFile() const
|
||||
{
|
||||
return pidFile;
|
||||
}
|
||||
|
||||
DbTypes getDbType() const
|
||||
{
|
||||
return dbType;
|
||||
}
|
||||
|
||||
const std::string& getDbHostName() const
|
||||
{
|
||||
return dbHostName;
|
||||
}
|
||||
|
||||
unsigned getDbHostPort() const
|
||||
{
|
||||
return dbHostPort;
|
||||
}
|
||||
|
||||
const std::string& getDbDatabase() const
|
||||
{
|
||||
return dbDatabase;
|
||||
}
|
||||
|
||||
const std::string& getDbBucket() const
|
||||
{
|
||||
return dbBucket;
|
||||
}
|
||||
|
||||
unsigned getInfluxdbMaxPointsPerRequest() const
|
||||
{
|
||||
return influxdbMaxPointsPerRequest;
|
||||
}
|
||||
|
||||
bool getInfluxDbSetRetentionPolicy() const
|
||||
{
|
||||
return influxdbSetRetentionPolicy;
|
||||
}
|
||||
|
||||
const std::string& getInfluxDbRetentionDuration() const
|
||||
{
|
||||
return influxdbRetentionDuration;
|
||||
}
|
||||
|
||||
unsigned getCassandraMaxInsertsPerBatch() const
|
||||
{
|
||||
return cassandraMaxInsertsPerBatch;
|
||||
}
|
||||
|
||||
unsigned getCassandraTTLSecs() const
|
||||
{
|
||||
return cassandraTTLSecs;
|
||||
}
|
||||
|
||||
bool getCollectClientOpsByNode() const
|
||||
{
|
||||
return collectClientOpsByNode;
|
||||
}
|
||||
|
||||
bool getCollectClientOpsByUser() const
|
||||
{
|
||||
return collectClientOpsByUser;
|
||||
}
|
||||
|
||||
const std::chrono::milliseconds& getHttpTimeout() const
|
||||
{
|
||||
return httpTimeout;
|
||||
}
|
||||
|
||||
const std::chrono::seconds& getStatsRequestInterval() const
|
||||
{
|
||||
return statsRequestInterval;
|
||||
}
|
||||
|
||||
const std::chrono::seconds& getNodelistRequestInterval() const
|
||||
{
|
||||
return nodelistRequestInterval;
|
||||
}
|
||||
|
||||
const std::string& getDbAuthUsername() const
|
||||
{
|
||||
return dbAuthUsername;
|
||||
}
|
||||
|
||||
const std::string& getDbAuthPassword() const
|
||||
{
|
||||
return dbAuthPassword;
|
||||
}
|
||||
|
||||
const std::string& getDbAuthOrg() const
|
||||
{
|
||||
return dbAuthOrg;
|
||||
}
|
||||
|
||||
const std::string& getDbAuthToken() const
|
||||
{
|
||||
return dbAuthToken;
|
||||
}
|
||||
|
||||
bool getCurlCheckSSLCertificates() const
|
||||
{
|
||||
return curlCheckSSLCertificates;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /*CONFIG_H_*/
|
||||
14
mon/source/app/Main.cpp
Normal file
14
mon/source/app/Main.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
#include <common/toolkit/BuildTypeTk.h>
|
||||
#include <app/SignalHandler.h>
|
||||
#include <app/App.h>
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
BuildTypeTk::checkDebugBuildTypes();
|
||||
AbstractApp::runTimeInitsAndChecks();
|
||||
|
||||
App app(argc, argv);
|
||||
app.startInCurrentThread();
|
||||
|
||||
return app.getAppResult();
|
||||
}
|
||||
49
mon/source/app/SignalHandler.cpp
Normal file
49
mon/source/app/SignalHandler.cpp
Normal file
@@ -0,0 +1,49 @@
|
||||
#include "SignalHandler.h"
|
||||
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <app/App.h>
|
||||
|
||||
#include <csignal>
|
||||
|
||||
App* SignalHandler::app = nullptr;
|
||||
|
||||
void SignalHandler::registerSignalHandler(App* app)
|
||||
{
|
||||
SignalHandler::app = app;
|
||||
signal(SIGINT, SignalHandler::handle);
|
||||
signal(SIGTERM, SignalHandler::handle);
|
||||
}
|
||||
|
||||
|
||||
void SignalHandler::handle(int sig)
|
||||
{
|
||||
// reset signal handling to default
|
||||
signal(sig, SIG_DFL);
|
||||
|
||||
if (Logger::isInitialized())
|
||||
{
|
||||
switch(sig)
|
||||
{
|
||||
case SIGINT:
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, "Received a SIGINT. Shutting down...");
|
||||
} break;
|
||||
|
||||
case SIGTERM:
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, "Received a SIGTERM. Shutting down...");
|
||||
} break;
|
||||
|
||||
default:
|
||||
{
|
||||
// shouldn't happen
|
||||
LOG(GENERAL, CRITICAL, "Received an unknown signal. Shutting down...");
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
if (app != nullptr)
|
||||
{
|
||||
app->stopComponents();
|
||||
}
|
||||
}
|
||||
16
mon/source/app/SignalHandler.h
Normal file
16
mon/source/app/SignalHandler.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef SIGNAL_HANDLER_H_
|
||||
#define SIGNAL_HANDLER_H_
|
||||
|
||||
class App;
|
||||
|
||||
class SignalHandler
|
||||
{
|
||||
public:
|
||||
static void registerSignalHandler(App* app);
|
||||
static void handle(int sig);
|
||||
|
||||
private:
|
||||
static App* app;
|
||||
};
|
||||
|
||||
#endif
|
||||
67
mon/source/components/CleanUp.cpp
Normal file
67
mon/source/components/CleanUp.cpp
Normal file
@@ -0,0 +1,67 @@
|
||||
#include "CleanUp.h"
|
||||
|
||||
#include <app/App.h>
|
||||
|
||||
CleanUp::CleanUp(App* app) :
|
||||
PThread("CleanUp"), app(app)
|
||||
{}
|
||||
|
||||
void CleanUp::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Component started.");
|
||||
registerSignalHandler();
|
||||
loop();
|
||||
LOG(GENERAL, DEBUG, "Component stopped.");
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
app->handleComponentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
void CleanUp::loop()
|
||||
{
|
||||
const std::chrono::minutes idleDisconnectInterval(30);
|
||||
|
||||
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(idleDisconnectInterval).count()))
|
||||
{
|
||||
dropIdleConns();
|
||||
}
|
||||
}
|
||||
|
||||
void CleanUp::dropIdleConns()
|
||||
{
|
||||
unsigned numDroppedConns = 0;
|
||||
|
||||
numDroppedConns += dropIdleConnsByStore(app->getMgmtNodes());
|
||||
numDroppedConns += dropIdleConnsByStore(app->getMetaNodes());
|
||||
numDroppedConns += dropIdleConnsByStore(app->getStorageNodes());
|
||||
|
||||
if (numDroppedConns)
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Idle connections dropped", numDroppedConns);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned CleanUp::dropIdleConnsByStore(NodeStoreServers* nodes)
|
||||
{
|
||||
unsigned numDroppedConns = 0;
|
||||
|
||||
const auto referencedNodes = nodes->referenceAllNodes();
|
||||
for (auto node = referencedNodes.begin(); node != referencedNodes.end();
|
||||
node++)
|
||||
{
|
||||
// don't do any idle disconnect stuff with local node
|
||||
// (the LocalNodeConnPool doesn't support and doesn't need this kind of treatment)
|
||||
if (*node != app->getLocalNode())
|
||||
{
|
||||
auto connPool = (*node)->getConnPool();
|
||||
|
||||
numDroppedConns += connPool->disconnectAndResetIdleStreams();
|
||||
}
|
||||
}
|
||||
|
||||
return numDroppedConns;
|
||||
}
|
||||
24
mon/source/components/CleanUp.h
Normal file
24
mon/source/components/CleanUp.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef CLEANUP_H_
|
||||
#define CLEANUP_H_
|
||||
|
||||
#include <common/threading/PThread.h>
|
||||
#include <common/nodes/NodeStoreServers.h>
|
||||
|
||||
class App;
|
||||
|
||||
class CleanUp : public PThread
|
||||
{
|
||||
public:
|
||||
CleanUp(App* app);
|
||||
|
||||
private:
|
||||
App* const app;
|
||||
virtual void run() override;
|
||||
void loop();
|
||||
void dropIdleConns();
|
||||
unsigned dropIdleConnsByStore(NodeStoreServers* nodes);
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif /* CLEANUP_H_ */
|
||||
91
mon/source/components/NodeListRequestor.cpp
Normal file
91
mon/source/components/NodeListRequestor.cpp
Normal file
@@ -0,0 +1,91 @@
|
||||
#include "NodeListRequestor.h"
|
||||
|
||||
#include <common/toolkit/NodesTk.h>
|
||||
#include <components/worker/GetNodesWork.h>
|
||||
|
||||
#include <app/App.h>
|
||||
|
||||
static const unsigned MGMT_NUM_TRIES = 3;
|
||||
static const std::chrono::milliseconds MGMT_TIMEOUT{1000};
|
||||
|
||||
NodeListRequestor::NodeListRequestor(App* app) :
|
||||
PThread("NodeListReq"), app(app)
|
||||
{}
|
||||
|
||||
void NodeListRequestor::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Component started.");
|
||||
registerSignalHandler();
|
||||
|
||||
requestLoop();
|
||||
|
||||
LOG(GENERAL, DEBUG, "Component stopped.");
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
app->handleComponentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
void NodeListRequestor::requestLoop()
|
||||
{
|
||||
do
|
||||
{
|
||||
// Get management node. Do this every time before updating node lists to check if
|
||||
// management is online to prevent log spam from NodesTk::downloadNodes when it is
|
||||
// not reachable
|
||||
if (!getMgmtNodeInfo())
|
||||
{
|
||||
LOG(GENERAL, NOTICE, "Did not receive a response from management node!");
|
||||
continue;
|
||||
}
|
||||
|
||||
// try to reference first mgmt node (which is at the moment the only one)
|
||||
std::shared_ptr<Node> mgmtNode = app->getMgmtNodes()->referenceFirstNode();
|
||||
|
||||
if (mgmtNode)
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Requesting node lists...");
|
||||
|
||||
app->getWorkQueue()->addIndirectWork(new GetNodesWork(mgmtNode, app->getMetaNodes(),
|
||||
NODETYPE_Meta, app->getMetaBuddyGroupMapper(), app->getLocalNode()));
|
||||
app->getWorkQueue()->addIndirectWork(new GetNodesWork(mgmtNode,
|
||||
app->getStorageNodes(), NODETYPE_Storage, app->getStorageBuddyGroupMapper(),
|
||||
app->getLocalNode()));
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Unable to reference management node for node list request.");
|
||||
}
|
||||
}
|
||||
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(
|
||||
app->getConfig()->getNodelistRequestInterval()).count()));
|
||||
}
|
||||
|
||||
bool NodeListRequestor::getMgmtNodeInfo()
|
||||
{
|
||||
for (unsigned i = 0; i < MGMT_NUM_TRIES; i++)
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Waiting for management node...");
|
||||
|
||||
// get mgmtd node using NodesTk
|
||||
auto mgmtNode = NodesTk::downloadNodeInfo(app->getConfig()->getSysMgmtdHost(),
|
||||
app->getConfig()->getConnMgmtdPort(), app->getConfig()->getConnAuthHash(),
|
||||
app->getNetMessageFactory(),
|
||||
NODETYPE_Mgmt, MGMT_TIMEOUT.count());
|
||||
|
||||
if(mgmtNode)
|
||||
{
|
||||
app->getMgmtNodes()->addOrUpdateNodeEx(std::move(mgmtNode), nullptr);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (PThread::waitForSelfTerminateOrder(std::chrono::milliseconds(MGMT_TIMEOUT).count()))
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
20
mon/source/components/NodeListRequestor.h
Normal file
20
mon/source/components/NodeListRequestor.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef NODELISTREQUESTOR_H_
|
||||
#define NODELISTREQUESTOR_H_
|
||||
|
||||
#include <common/threading/PThread.h>
|
||||
|
||||
class App;
|
||||
|
||||
class NodeListRequestor : public PThread
|
||||
{
|
||||
public:
|
||||
NodeListRequestor(App* app);
|
||||
|
||||
private:
|
||||
App* const app;
|
||||
virtual void run() override;
|
||||
void requestLoop();
|
||||
bool getMgmtNodeInfo();
|
||||
};
|
||||
|
||||
#endif /*NODELISTREQUESTOR_H_*/
|
||||
206
mon/source/components/StatsCollector.cpp
Normal file
206
mon/source/components/StatsCollector.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
#include "StatsCollector.h"
|
||||
|
||||
#include <common/toolkit/SocketTk.h>
|
||||
#include <common/nodes/OpCounterTypes.h>
|
||||
|
||||
#include <app/App.h>
|
||||
|
||||
|
||||
StatsCollector::StatsCollector(App* app) :
|
||||
PThread("StatsCollector"), app(app)
|
||||
{}
|
||||
|
||||
void StatsCollector::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Component started.");
|
||||
registerSignalHandler();
|
||||
requestLoop();
|
||||
LOG(GENERAL, DEBUG, "Component stopped.");
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
app->handleComponentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
void StatsCollector::requestLoop()
|
||||
{
|
||||
bool collectClientOpsByNode = app->getConfig()->getCollectClientOpsByNode();
|
||||
bool collectClientOpsByUser = app->getConfig()->getCollectClientOpsByUser();
|
||||
|
||||
// intially wait one query interval before requesting stats to give NodeListRequestor the time
|
||||
// to retrieve the node lists
|
||||
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(
|
||||
app->getConfig()->getStatsRequestInterval()).count()))
|
||||
{
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Requesting Stats...");
|
||||
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
|
||||
workItemCounter = 0;
|
||||
metaResults.clear();
|
||||
storageResults.clear();
|
||||
|
||||
// collect data
|
||||
|
||||
const auto& metaNodes = app->getMetaNodes()->referenceAllNodes();
|
||||
|
||||
for (auto node = metaNodes.begin(); node != metaNodes.end(); node++)
|
||||
{
|
||||
workItemCounter++;
|
||||
app->getWorkQueue()->addIndirectWork(
|
||||
new RequestMetaDataWork(std::static_pointer_cast<MetaNodeEx>(*node),
|
||||
this, collectClientOpsByNode, collectClientOpsByUser));
|
||||
}
|
||||
|
||||
const auto& storageNodes = app->getStorageNodes()->referenceAllNodes();
|
||||
|
||||
for (auto node = storageNodes.begin(); node != storageNodes.end(); node++)
|
||||
{
|
||||
workItemCounter++;
|
||||
app->getWorkQueue()->addIndirectWork(
|
||||
new RequestStorageDataWork(std::static_pointer_cast<StorageNodeEx>(*node),
|
||||
this, collectClientOpsByNode, collectClientOpsByUser));
|
||||
}
|
||||
|
||||
while (workItemCounter > 0)
|
||||
condVar.wait(lock);
|
||||
|
||||
// write data
|
||||
|
||||
for (auto iter = metaResults.begin(); iter != metaResults.end(); iter++)
|
||||
{
|
||||
app->getTSDB()->insertMetaNodeData(iter->node, iter->data);
|
||||
|
||||
for (auto listIter = iter->highResStatsList.begin();
|
||||
listIter != iter->highResStatsList.end(); listIter++)
|
||||
{
|
||||
app->getTSDB()->insertHighResMetaNodeData(iter->node, *listIter);
|
||||
}
|
||||
|
||||
if (collectClientOpsByNode)
|
||||
{
|
||||
for (auto mapIter = iter->ipOpsUnorderedMap.begin();
|
||||
mapIter != iter->ipOpsUnorderedMap.end(); mapIter++)
|
||||
{
|
||||
ipMetaClientOps.addOpsList(mapIter->first, mapIter->second);
|
||||
}
|
||||
}
|
||||
|
||||
if (collectClientOpsByUser)
|
||||
{
|
||||
for (auto mapIter = iter->userOpsUnorderedMap.begin();
|
||||
mapIter != iter->userOpsUnorderedMap.end(); mapIter++)
|
||||
{
|
||||
userMetaClientOps.addOpsList(mapIter->first, mapIter->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto iter = storageResults.begin(); iter != storageResults.end(); iter++)
|
||||
{
|
||||
app->getTSDB()->insertStorageNodeData(iter->node, iter->data);
|
||||
|
||||
for (auto listIter = iter->highResStatsList.begin();
|
||||
listIter != iter->highResStatsList.end(); listIter++)
|
||||
{
|
||||
app->getTSDB()->insertHighResStorageNodeData(iter->node, *listIter);
|
||||
}
|
||||
|
||||
for (auto listIter = iter->storageTargetList.begin();
|
||||
listIter != iter->storageTargetList.end();
|
||||
listIter++)
|
||||
{
|
||||
app->getTSDB()->insertStorageTargetsData(iter->node, *listIter);
|
||||
}
|
||||
|
||||
if (collectClientOpsByNode)
|
||||
{
|
||||
for (auto mapIter = iter->ipOpsUnorderedMap.begin();
|
||||
mapIter != iter->ipOpsUnorderedMap.end(); mapIter++)
|
||||
{
|
||||
ipStorageClientOps.addOpsList(mapIter->first, mapIter->second);
|
||||
}
|
||||
}
|
||||
|
||||
if (collectClientOpsByUser)
|
||||
{
|
||||
for (auto mapIter = iter->userOpsUnorderedMap.begin();
|
||||
mapIter != iter->userOpsUnorderedMap.end(); mapIter++)
|
||||
{
|
||||
userStorageClientOps.addOpsList(mapIter->first, mapIter->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (collectClientOpsByNode)
|
||||
{
|
||||
processClientOps(ipMetaClientOps, NODETYPE_Meta, false);
|
||||
processClientOps(ipStorageClientOps, NODETYPE_Storage, false);
|
||||
}
|
||||
|
||||
if (collectClientOpsByUser)
|
||||
{
|
||||
processClientOps(userMetaClientOps, NODETYPE_Meta, true);
|
||||
processClientOps(userStorageClientOps, NODETYPE_Storage, true);
|
||||
}
|
||||
|
||||
app->getTSDB()->write();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StatsCollector::processClientOps(ClientOps& clientOps, NodeType nodeType, bool perUser)
|
||||
{
|
||||
ClientOps::IdOpsMap diffOpsMap;
|
||||
ClientOps::OpsList sumOpsList;
|
||||
|
||||
diffOpsMap = clientOps.getDiffOpsMap();
|
||||
sumOpsList = clientOps.getDiffSumOpsList();
|
||||
|
||||
if (!diffOpsMap.empty())
|
||||
{
|
||||
for (auto opsMapIter = diffOpsMap.begin();
|
||||
opsMapIter != diffOpsMap.end();
|
||||
opsMapIter++)
|
||||
{
|
||||
std::string id;
|
||||
|
||||
if (perUser)
|
||||
{
|
||||
if (opsMapIter->first == ~0U)
|
||||
id = "undefined";
|
||||
else
|
||||
id = StringTk::uintToStr(opsMapIter->first);
|
||||
}
|
||||
else
|
||||
{
|
||||
struct in_addr inAddr = { (in_addr_t)opsMapIter->first };
|
||||
id = Socket::ipaddrToStr(inAddr);
|
||||
}
|
||||
|
||||
std::map<std::string, uint64_t> stringOpMap;
|
||||
unsigned opCounter = 0;
|
||||
for (auto opsListIter = opsMapIter->second.begin();
|
||||
opsListIter != opsMapIter->second.end();
|
||||
opsListIter++)
|
||||
{
|
||||
std::string opName;
|
||||
if (nodeType == NODETYPE_Meta)
|
||||
opName = OpToStringMapping::mapMetaOpNum(opCounter);
|
||||
else if (nodeType == NODETYPE_Storage)
|
||||
opName = OpToStringMapping::mapStorageOpNum(opCounter);
|
||||
|
||||
stringOpMap[opName] = *opsListIter;
|
||||
opCounter++;
|
||||
}
|
||||
|
||||
app->getTSDB()->insertClientNodeData(id, nodeType, stringOpMap, perUser);
|
||||
}
|
||||
}
|
||||
|
||||
clientOps.clear();
|
||||
}
|
||||
56
mon/source/components/StatsCollector.h
Normal file
56
mon/source/components/StatsCollector.h
Normal file
@@ -0,0 +1,56 @@
|
||||
#ifndef STATSCOLLECTOR_H_
|
||||
#define STATSCOLLECTOR_H_
|
||||
|
||||
#include <common/threading/PThread.h>
|
||||
#include <components/worker/RequestMetaDataWork.h>
|
||||
#include <components/worker/RequestStorageDataWork.h>
|
||||
#include <common/nodes/ClientOps.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
|
||||
class App;
|
||||
|
||||
class StatsCollector : public PThread
|
||||
{
|
||||
friend class RequestMetaDataWork;
|
||||
friend class RequestStorageDataWork;
|
||||
|
||||
public:
|
||||
StatsCollector(App* app);
|
||||
|
||||
private:
|
||||
App* const app;
|
||||
ClientOps ipMetaClientOps;
|
||||
ClientOps ipStorageClientOps;
|
||||
ClientOps userMetaClientOps;
|
||||
ClientOps userStorageClientOps;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
int workItemCounter;
|
||||
std::list<RequestMetaDataWork::Result> metaResults;
|
||||
std::list<RequestStorageDataWork::Result> storageResults;
|
||||
std::condition_variable condVar;
|
||||
|
||||
virtual void run() override;
|
||||
void requestLoop();
|
||||
void processClientOps(ClientOps& clientOps, NodeType nodeType, bool perUser);
|
||||
|
||||
void insertMetaData(RequestMetaDataWork::Result result)
|
||||
{
|
||||
const std::unique_lock<std::mutex> lock(mutex);
|
||||
metaResults.push_back(std::move(result));
|
||||
workItemCounter--;
|
||||
condVar.notify_one();
|
||||
}
|
||||
|
||||
void insertStorageData(RequestStorageDataWork::Result result)
|
||||
{
|
||||
const std::unique_lock<std::mutex> lock(mutex);
|
||||
storageResults.push_back(std::move(result));
|
||||
workItemCounter--;
|
||||
condVar.notify_one();
|
||||
}
|
||||
};
|
||||
|
||||
#endif /*STATSCOLLECTOR_H_*/
|
||||
40
mon/source/components/worker/GetNodesWork.cpp
Normal file
40
mon/source/components/worker/GetNodesWork.cpp
Normal file
@@ -0,0 +1,40 @@
|
||||
#include "GetNodesWork.h"
|
||||
|
||||
#include <common/toolkit/NodesTk.h>
|
||||
|
||||
void GetNodesWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
|
||||
{
|
||||
std::vector<std::shared_ptr<Node>> nodesList;
|
||||
std::list<NumNodeID> addedNodes;
|
||||
std::list<NumNodeID> removedNodes;
|
||||
|
||||
|
||||
|
||||
if (NodesTk::downloadNodes(*mgmtdNode, nodeType, nodesList, false))
|
||||
{
|
||||
// sync the downloaded list with the node store
|
||||
nodes->syncNodes(nodesList, &addedNodes, &removedNodes, localNode.get());
|
||||
|
||||
if (!addedNodes.empty())
|
||||
LOG(GENERAL, WARNING, "Nodes added.", ("addedNodes", addedNodes.size()), nodeType);
|
||||
|
||||
if (!removedNodes.empty())
|
||||
LOG(GENERAL, WARNING, "Nodes removed.", ("removedNodes", removedNodes.size()), nodeType);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(GENERAL, ERR, "Couldn't download server list from management daemon.", nodeType);
|
||||
}
|
||||
|
||||
std::list<uint16_t> buddyGroupIDList;
|
||||
std::list<uint16_t> primaryTargetIDList;
|
||||
std::list<uint16_t> secondaryTargetIDList;
|
||||
|
||||
// update the storage buddy groups
|
||||
if (NodesTk::downloadMirrorBuddyGroups(*mgmtdNode, nodeType, &buddyGroupIDList,
|
||||
&primaryTargetIDList, &secondaryTargetIDList, false) )
|
||||
{
|
||||
buddyGroupMapper->syncGroupsFromLists(buddyGroupIDList, primaryTargetIDList,
|
||||
secondaryTargetIDList, NumNodeID());
|
||||
}
|
||||
}
|
||||
32
mon/source/components/worker/GetNodesWork.h
Normal file
32
mon/source/components/worker/GetNodesWork.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef GETNODESWORK_H_
|
||||
#define GETNODESWORK_H_
|
||||
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/nodes/MirrorBuddyGroupMapper.h>
|
||||
#include <common/nodes/NodeType.h>
|
||||
#include <common/nodes/NodeStoreServers.h>
|
||||
|
||||
class GetNodesWork : public Work
|
||||
{
|
||||
public:
|
||||
GetNodesWork(std::shared_ptr<Node> mgmtdNode, NodeStoreServers *nodes, NodeType nodeType,
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper, std::shared_ptr<Node> localNode)
|
||||
: mgmtdNode(std::move(mgmtdNode)),
|
||||
nodes(nodes),
|
||||
nodeType(nodeType),
|
||||
buddyGroupMapper(buddyGroupMapper),
|
||||
localNode(localNode)
|
||||
{}
|
||||
|
||||
virtual void process(char* bufIn, unsigned bufInLen,
|
||||
char* bufOut, unsigned bufOutLen) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<Node> mgmtdNode;
|
||||
NodeStoreServers* nodes;
|
||||
NodeType nodeType;
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper;
|
||||
std::shared_ptr<Node> localNode;
|
||||
};
|
||||
|
||||
#endif /*GETNODESWORK_H_*/
|
||||
69
mon/source/components/worker/RequestMetaDataWork.cpp
Normal file
69
mon/source/components/worker/RequestMetaDataWork.cpp
Normal file
@@ -0,0 +1,69 @@
|
||||
#include "RequestMetaDataWork.h"
|
||||
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
|
||||
#include <common/net/message/mon/RequestMetaDataMsg.h>
|
||||
#include <common/net/message/mon/RequestMetaDataRespMsg.h>
|
||||
#include <components/StatsCollector.h>
|
||||
|
||||
void RequestMetaDataWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
|
||||
{
|
||||
|
||||
if (!node->getIsResponding())
|
||||
{
|
||||
HeartbeatRequestMsg heartbeatRequestMsg;
|
||||
if(MessagingTk::requestResponse(*node, heartbeatRequestMsg,
|
||||
NETMSGTYPE_Heartbeat))
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Node is responding again.",
|
||||
("NodeID", node->getNodeIDWithTypeStr()));
|
||||
node->setIsResponding(true);
|
||||
}
|
||||
}
|
||||
|
||||
Result result = {};
|
||||
result.data.isResponding = false;
|
||||
|
||||
if (node->getIsResponding())
|
||||
{
|
||||
// generate the RequestDataMsg with the lastStatsTime
|
||||
RequestMetaDataMsg requestDataMsg(node->getLastStatRequestTime().count());
|
||||
auto respMsg = MessagingTk::requestResponse(*node, requestDataMsg,
|
||||
NETMSGTYPE_RequestMetaDataResp);
|
||||
|
||||
if (!respMsg)
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Node is not responding.", ("NodeID", node->getNodeIDWithTypeStr()));
|
||||
node->setIsResponding(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
// get response and process it
|
||||
auto metaRspMsg = static_cast<RequestMetaDataRespMsg*>(respMsg.get());
|
||||
result.highResStatsList = std::move(metaRspMsg->getStatsList());
|
||||
|
||||
result.data.isResponding = true;
|
||||
result.data.indirectWorkListSize = metaRspMsg->getIndirectWorkListSize();
|
||||
result.data.directWorkListSize = metaRspMsg->getDirectWorkListSize();
|
||||
result.data.sessionCount = metaRspMsg->getSessionCount();
|
||||
result.data.hostnameid = metaRspMsg->gethostnameid();
|
||||
|
||||
if (!result.highResStatsList.empty())
|
||||
{
|
||||
auto lastStatsRequestTime = std::chrono::milliseconds(
|
||||
result.highResStatsList.front().rawVals.statsTimeMS);
|
||||
node->setLastStatRequestTime(lastStatsRequestTime);
|
||||
}
|
||||
|
||||
if (collectClientOpsByNode)
|
||||
result.ipOpsUnorderedMap = ClientOpsRequestor::request(*node, false);
|
||||
|
||||
if (collectClientOpsByUser)
|
||||
result.userOpsUnorderedMap = ClientOpsRequestor::request(*node, true);
|
||||
}
|
||||
}
|
||||
|
||||
result.node = std::move(node);
|
||||
|
||||
statsCollector->insertMetaData(std::move(result));
|
||||
}
|
||||
42
mon/source/components/worker/RequestMetaDataWork.h
Normal file
42
mon/source/components/worker/RequestMetaDataWork.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef REQUESTMETADATAWORK_H_
|
||||
#define REQUESTMETADATAWORK_H_
|
||||
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/nodes/ClientOps.h>
|
||||
#include <misc/TSDatabase.h>
|
||||
#include <nodes/MetaNodeEx.h>
|
||||
|
||||
class StatsCollector;
|
||||
|
||||
class RequestMetaDataWork : public Work
|
||||
{
|
||||
public:
|
||||
struct Result
|
||||
{
|
||||
std::shared_ptr<MetaNodeEx> node;
|
||||
MetaNodeDataContent data;
|
||||
HighResStatsList highResStatsList;
|
||||
ClientOpsRequestor::IdOpsUnorderedMap ipOpsUnorderedMap;
|
||||
ClientOpsRequestor::IdOpsUnorderedMap userOpsUnorderedMap;
|
||||
};
|
||||
|
||||
RequestMetaDataWork(std::shared_ptr<MetaNodeEx> node,
|
||||
StatsCollector* statsCollector,
|
||||
bool collectClientOpsByNode, bool collectClientOpsByUser) :
|
||||
node(std::move(node)),
|
||||
statsCollector(statsCollector),
|
||||
collectClientOpsByNode(collectClientOpsByNode),
|
||||
collectClientOpsByUser(collectClientOpsByUser)
|
||||
{}
|
||||
|
||||
virtual void process(char* bufIn, unsigned bufInLen,
|
||||
char* bufOut, unsigned bufOutLen) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<MetaNodeEx> node;
|
||||
StatsCollector* statsCollector;
|
||||
bool collectClientOpsByNode;
|
||||
bool collectClientOpsByUser;
|
||||
};
|
||||
|
||||
#endif /*REQUESTMETADATAWORK_H_*/
|
||||
74
mon/source/components/worker/RequestStorageDataWork.cpp
Normal file
74
mon/source/components/worker/RequestStorageDataWork.cpp
Normal file
@@ -0,0 +1,74 @@
|
||||
#include "RequestStorageDataWork.h"
|
||||
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
|
||||
#include <common/net/message/mon/RequestStorageDataMsg.h>
|
||||
#include <common/net/message/mon/RequestStorageDataRespMsg.h>
|
||||
#include <components/StatsCollector.h>
|
||||
|
||||
void RequestStorageDataWork::process(char* bufIn, unsigned bufInLen,
|
||||
char* bufOut, unsigned bufOutLen)
|
||||
{
|
||||
|
||||
if (!node->getIsResponding())
|
||||
{
|
||||
HeartbeatRequestMsg heartbeatRequestMsg;
|
||||
|
||||
if(MessagingTk::requestResponse(*node, heartbeatRequestMsg,
|
||||
NETMSGTYPE_Heartbeat))
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Node is responding again.",
|
||||
("NodeID", node->getNodeIDWithTypeStr()));
|
||||
node->setIsResponding(true);
|
||||
}
|
||||
}
|
||||
|
||||
Result result = {};
|
||||
result.data.isResponding = false;
|
||||
|
||||
if (node->getIsResponding())
|
||||
{
|
||||
// generate the RequestStorageDataMsg with the lastStatsTime
|
||||
RequestStorageDataMsg requestDataMsg(node->getLastStatRequestTime().count());
|
||||
auto respMsg = MessagingTk::requestResponse(*node, requestDataMsg,
|
||||
NETMSGTYPE_RequestStorageDataResp);
|
||||
|
||||
if (!respMsg)
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Node is not responding.", ("NodeID", node->getNodeIDWithTypeStr()));
|
||||
node->setIsResponding(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
// get response and process it
|
||||
auto storageRspMsg = static_cast<RequestStorageDataRespMsg*>(respMsg.get());
|
||||
result.highResStatsList = std::move(storageRspMsg->getStatsList());
|
||||
result.storageTargetList = std::move(storageRspMsg->getStorageTargets());
|
||||
|
||||
result.data.isResponding = true;
|
||||
result.data.indirectWorkListSize = storageRspMsg->getIndirectWorkListSize();
|
||||
result.data.directWorkListSize = storageRspMsg->getDirectWorkListSize();
|
||||
result.data.diskSpaceTotal = storageRspMsg->getDiskSpaceTotalMiB();
|
||||
result.data.diskSpaceFree = storageRspMsg->getDiskSpaceFreeMiB();
|
||||
result.data.sessionCount = storageRspMsg->getSessionCount();
|
||||
result.data.hostnameid = storageRspMsg->gethostnameid();
|
||||
|
||||
if (!result.highResStatsList.empty())
|
||||
{
|
||||
auto lastStatsRequestTime = std::chrono::milliseconds(
|
||||
result.highResStatsList.front().rawVals.statsTimeMS);
|
||||
node->setLastStatRequestTime(lastStatsRequestTime);
|
||||
}
|
||||
|
||||
if (collectClientOpsByNode)
|
||||
result.ipOpsUnorderedMap = ClientOpsRequestor::request(*node, false);
|
||||
|
||||
if (collectClientOpsByUser)
|
||||
result.userOpsUnorderedMap = ClientOpsRequestor::request(*node, true);
|
||||
}
|
||||
}
|
||||
|
||||
result.node = std::move(node);
|
||||
|
||||
statsCollector->insertStorageData(std::move(result));
|
||||
}
|
||||
44
mon/source/components/worker/RequestStorageDataWork.h
Normal file
44
mon/source/components/worker/RequestStorageDataWork.h
Normal file
@@ -0,0 +1,44 @@
|
||||
#ifndef REQUESTSTORAGEDATAWORK_H_
|
||||
#define REQUESTSTORAGEDATAWORK_H_
|
||||
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/nodes/ClientOps.h>
|
||||
#include <common/storage/StorageTargetInfo.h>
|
||||
#include <misc/TSDatabase.h>
|
||||
#include <nodes/StorageNodeEx.h>
|
||||
|
||||
class StatsCollector;
|
||||
|
||||
class RequestStorageDataWork : public Work
|
||||
{
|
||||
public:
|
||||
struct Result
|
||||
{
|
||||
std::shared_ptr<StorageNodeEx> node;
|
||||
StorageNodeDataContent data;
|
||||
HighResStatsList highResStatsList;
|
||||
StorageTargetInfoList storageTargetList;
|
||||
ClientOpsRequestor::IdOpsUnorderedMap ipOpsUnorderedMap;
|
||||
ClientOpsRequestor::IdOpsUnorderedMap userOpsUnorderedMap;
|
||||
};
|
||||
|
||||
RequestStorageDataWork(std::shared_ptr<StorageNodeEx> node,
|
||||
StatsCollector* statsCollector, bool collectClientOpsByNode,
|
||||
bool collectClientOpsByUser) :
|
||||
node(std::move(node)),
|
||||
statsCollector(statsCollector),
|
||||
collectClientOpsByNode(collectClientOpsByNode),
|
||||
collectClientOpsByUser(collectClientOpsByUser)
|
||||
{}
|
||||
|
||||
void process(char* bufIn, unsigned bufInLen, char* bufOut,
|
||||
unsigned bufOutLen);
|
||||
|
||||
private:
|
||||
std::shared_ptr<StorageNodeEx> node;
|
||||
StatsCollector* statsCollector;
|
||||
bool collectClientOpsByNode;
|
||||
bool collectClientOpsByUser;
|
||||
};
|
||||
|
||||
#endif /*REQUESTSTORAGEDATAWORK_H_*/
|
||||
8
mon/source/exception/CurlException.h
Normal file
8
mon/source/exception/CurlException.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef CURLEXCEPTION_H_
|
||||
#define CURLEXCEPTION_H_
|
||||
|
||||
#include <common/toolkit/NamedException.h>
|
||||
|
||||
DECLARE_NAMEDEXCEPTION(CurlException, "CurlException")
|
||||
|
||||
#endif /*CURLEXCEPTION_H_*/
|
||||
8
mon/source/exception/DatabaseException.h
Normal file
8
mon/source/exception/DatabaseException.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef DATABASEEXCEPTION_H_
|
||||
#define DATABASEEXCEPTION_H_
|
||||
|
||||
#include <common/toolkit/NamedException.h>
|
||||
|
||||
DECLARE_NAMEDEXCEPTION(DatabaseException, "DatabaseException")
|
||||
|
||||
#endif /*DATABASEEXCEPTION_H_*/
|
||||
348
mon/source/misc/Cassandra.cpp
Normal file
348
mon/source/misc/Cassandra.cpp
Normal file
@@ -0,0 +1,348 @@
|
||||
#include "Cassandra.h"
|
||||
|
||||
#include <common/storage/StorageTargetInfo.h>
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <exception/DatabaseException.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
static const std::string libVersion = "2.9";
|
||||
|
||||
template<typename T>
|
||||
std::function<T> loadSymbol(void* libHandle, const char* name)
|
||||
{
|
||||
dlerror();
|
||||
auto f = dlsym(libHandle, name);
|
||||
const char* error = dlerror();
|
||||
if (error != NULL)
|
||||
throw std::runtime_error("Couldn't load symbol: " + std::string(error)
|
||||
+ "\nThe cassandra plugin requires the datastax client library version " + libVersion
|
||||
+ ".");
|
||||
return reinterpret_cast<T(*)>(f);
|
||||
}
|
||||
|
||||
Cassandra::Cassandra(Config config) :
|
||||
cluster(nullptr, [this](CassCluster* c){cluster_free(c);}),
|
||||
session(nullptr, [this](CassSession* s){session_free(s);}),
|
||||
batch(nullptr, [this](CassBatch* b){batch_free(b);}),
|
||||
config(std::move(config)),
|
||||
libHandle(nullptr, dlclose),
|
||||
numQueries(0)
|
||||
{
|
||||
// Load datastax cassandra library
|
||||
dlerror();
|
||||
libHandle.reset(dlopen("libcassandra.so", RTLD_NOW));
|
||||
const char* error = dlerror();
|
||||
if (libHandle == NULL || error != NULL)
|
||||
{
|
||||
throw std::runtime_error("Couldn't load cassandra client library (libcassandra.so): "
|
||||
+ std::string(error) + "\nThe cassandra plugin requires the datastax client library"
|
||||
+ " version " + libVersion + ".");
|
||||
}
|
||||
|
||||
// load used symbols
|
||||
cluster_new = loadSymbol<decltype(cass_cluster_new)>(
|
||||
libHandle.get(), "cass_cluster_new");
|
||||
cluster_free = loadSymbol<decltype(cass_cluster_free)>(
|
||||
libHandle.get(), "cass_cluster_free");
|
||||
session_new = loadSymbol<decltype(cass_session_new)>(
|
||||
libHandle.get(), "cass_session_new");
|
||||
session_free = loadSymbol<decltype(cass_session_free)>(
|
||||
libHandle.get(), "cass_session_free");
|
||||
batch_new = loadSymbol<decltype(cass_batch_new)>(
|
||||
libHandle.get(), "cass_batch_new");
|
||||
batch_free = loadSymbol<decltype(cass_batch_free)>(
|
||||
libHandle.get(), "cass_batch_free");
|
||||
batch_add_statement = loadSymbol<decltype(cass_batch_add_statement)>(
|
||||
libHandle.get(), "cass_batch_add_statement");
|
||||
cluster_set_contact_points = loadSymbol<decltype(cass_cluster_set_contact_points)>(
|
||||
libHandle.get(), "cass_cluster_set_contact_points");
|
||||
cluster_set_port = loadSymbol<decltype(cass_cluster_set_port)>(
|
||||
libHandle.get(), "cass_cluster_set_port");
|
||||
session_connect = loadSymbol<decltype(cass_session_connect)>(
|
||||
libHandle.get(), "cass_session_connect");
|
||||
session_execute = loadSymbol<decltype(cass_session_execute)>(
|
||||
libHandle.get(), "cass_session_execute");
|
||||
session_execute_batch = loadSymbol<decltype(cass_session_execute_batch)>(
|
||||
libHandle.get(), "cass_session_execute_batch");
|
||||
future_error_code = loadSymbol<decltype(cass_future_error_code)>(
|
||||
libHandle.get(), "cass_future_error_code");
|
||||
future_error_message = loadSymbol<decltype(cass_future_error_message)>(
|
||||
libHandle.get(), "cass_future_error_message");
|
||||
future_free = loadSymbol<decltype(cass_future_free)>(
|
||||
libHandle.get(), "cass_future_free");
|
||||
statement_new = loadSymbol<decltype(cass_statement_new)>(
|
||||
libHandle.get(), "cass_statement_new");
|
||||
statement_free = loadSymbol<decltype(cass_statement_free)>(
|
||||
libHandle.get(), "cass_statement_free");
|
||||
|
||||
cluster.reset(cluster_new());
|
||||
session.reset(session_new());
|
||||
batch.reset(batch_new(CASS_BATCH_TYPE_LOGGED));
|
||||
|
||||
cluster_set_contact_points(cluster.get(), this->config.host.c_str());
|
||||
cluster_set_port(cluster.get(), this->config.port);
|
||||
|
||||
unsigned tries = 0;
|
||||
while (true)
|
||||
{
|
||||
auto connectFuture = std::unique_ptr<CassFuture, decltype(future_free)>(
|
||||
session_connect(session.get(), cluster.get()), future_free);
|
||||
|
||||
CassError err = future_error_code(connectFuture.get());
|
||||
if (err == CASS_OK)
|
||||
break;
|
||||
|
||||
const char* message;
|
||||
size_t length;
|
||||
future_error_message(connectFuture.get(), &message, &length);
|
||||
|
||||
LOG(DATABASE, ERR, "Couldn't connect to cassandra database: " + std::string(message));
|
||||
tries++;
|
||||
if (tries >= connectionRetries)
|
||||
throw DatabaseException("Connection to cassandra database failed.");
|
||||
else
|
||||
LOG(DATABASE, WARNING, "Retrying in 10 seconds.");
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(10));
|
||||
}
|
||||
|
||||
// Create and switch to keyspace
|
||||
query("CREATE KEYSPACE IF NOT EXISTS " + this->config.database + " WITH "
|
||||
+ "replication = {'class': 'SimpleStrategy', 'replication_factor' : 3};");
|
||||
query("USE " + this->config.database + ";");
|
||||
|
||||
// Create tables
|
||||
query("CREATE TABLE IF NOT EXISTS meta ("
|
||||
"time timestamp, nodeNumID int, nodeID varchar, isResponding boolean, "
|
||||
"indirectWorkListSize int, directWorkListSize int, PRIMARY KEY(time, nodeNumID));");
|
||||
|
||||
query("CREATE TABLE IF NOT EXISTS highResMeta ("
|
||||
"time timestamp, nodeNumID int, nodeID varchar, workRequests int, "
|
||||
"queuedRequests int, netSendBytes int, netRecvBytes int, PRIMARY KEY(time, nodeNumID));");
|
||||
|
||||
query("CREATE TABLE IF NOT EXISTS storage ("
|
||||
"time timestamp, nodeNumID int, nodeID varchar, isResponding boolean, "
|
||||
"indirectWorkListSize int, directWorkListSize int, "
|
||||
"diskSpaceTotal bigint, diskSpaceFree bigint, PRIMARY KEY(time, nodeNumID));");
|
||||
|
||||
query("CREATE TABLE IF NOT EXISTS highResStorage ("
|
||||
"time timestamp, nodeNumID int, nodeID varchar, workRequests int, "
|
||||
"queuedRequests int, diskWriteBytes int, diskReadBytes int, "
|
||||
"netSendBytes int, netRecvBytes int, PRIMARY KEY(time, nodeNumID));");
|
||||
|
||||
query("CREATE TABLE IF NOT EXISTS storageTargetData ("
|
||||
"time timestamp, nodeNumID int, nodeID varchar, storageTargetID int, "
|
||||
"diskSpaceTotal bigint, diskSpaceFree bigint, inodesTotal int, inodesFree int, "
|
||||
"PRIMARY KEY(time, nodeNumID));");
|
||||
|
||||
query("CREATE TABLE IF NOT EXISTS metaClientOpsByNode ("
|
||||
"time timestamp, node varchar, ops map<varchar,int> ,"
|
||||
"PRIMARY KEY(time, node));");
|
||||
query("CREATE TABLE IF NOT EXISTS storageClientOpsByNode ("
|
||||
"time timestamp, node varchar, ops map<varchar,int> ,"
|
||||
"PRIMARY KEY(time, node));");
|
||||
query("CREATE TABLE IF NOT EXISTS metaClientOpsByUser ("
|
||||
"time timestamp, user varchar, ops map<varchar,int> ,"
|
||||
"PRIMARY KEY(time, user));");
|
||||
query("CREATE TABLE IF NOT EXISTS storageClientOpsByUser ("
|
||||
"time timestamp, user varchar, ops map<varchar,int> ,"
|
||||
"PRIMARY KEY(time, user));");
|
||||
}
|
||||
|
||||
void Cassandra::query(const std::string& query, bool waitForResult)
|
||||
{
|
||||
CassStatement* statement = statement_new(query.c_str(), 0);
|
||||
auto queryFuture = std::unique_ptr<CassFuture, decltype(future_free)>(
|
||||
session_execute(session.get(), statement), future_free);
|
||||
statement_free(statement);
|
||||
|
||||
if (waitForResult)
|
||||
{
|
||||
CassError result = future_error_code(queryFuture.get());
|
||||
|
||||
if (result != CASS_OK)
|
||||
{
|
||||
const char* message;
|
||||
size_t length;
|
||||
future_error_message(queryFuture.get(), &message, &length);
|
||||
throw DatabaseException("Query '" + query + "' failed: " + std::string(message));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Cassandra::insertMetaNodeData(std::shared_ptr<Node> node, const MetaNodeDataContent& data)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO meta ";
|
||||
statement << "(time, nodeNumID, nodeID, isResponding";
|
||||
if (data.isResponding)
|
||||
statement << ", indirectWorkListSize, directWorkListSize) ";
|
||||
else
|
||||
statement << ") ";
|
||||
statement << "VALUES (";
|
||||
statement << "TOTIMESTAMP(NOW()), " << node->getNumID() << ", '" << node->getAlias() << "', ";
|
||||
statement << std::boolalpha << data.isResponding;
|
||||
if (data.isResponding)
|
||||
statement << ", " << data.indirectWorkListSize << ", " << data.directWorkListSize << ") ";
|
||||
else
|
||||
statement << ") ";
|
||||
statement << "USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
appendQuery(statement.str());
|
||||
}
|
||||
|
||||
void Cassandra::insertStorageNodeData(std::shared_ptr<Node> node,
|
||||
const StorageNodeDataContent& data)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO storage ";
|
||||
statement << "(time, nodeNumID, nodeID, isResponding";
|
||||
if (data.isResponding)
|
||||
statement << ", indirectWorkListSize, directWorkListSize, diskSpaceTotal, diskSpaceFree) ";
|
||||
else
|
||||
statement << ") ";
|
||||
statement << "VALUES (";
|
||||
statement << "TOTIMESTAMP(NOW()), " << node->getNumID() << ", '" << node->getAlias() << "', ";
|
||||
statement << std::boolalpha << data.isResponding;
|
||||
if (data.isResponding)
|
||||
statement << ", " << data.indirectWorkListSize << ", " << data.directWorkListSize << ", "
|
||||
<< data.diskSpaceTotal << ", " << data.diskSpaceFree << ") ";
|
||||
else
|
||||
statement << ") ";
|
||||
statement << "USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
appendQuery(statement.str());
|
||||
|
||||
}
|
||||
|
||||
void Cassandra::insertHighResMetaNodeData(std::shared_ptr<Node> node,
|
||||
const HighResolutionStats& data)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO highResMeta ";
|
||||
statement << "(time, nodeNumID, nodeID, workRequests, ";
|
||||
statement << "queuedRequests, netSendBytes, netRecvBytes) VALUES (";
|
||||
statement << data.rawVals.statsTimeMS << ", " << node->getNumID() << ", '" << node->getAlias() << "', ";
|
||||
statement << data.incVals.workRequests << ", " << data.rawVals.queuedRequests << ", ";
|
||||
statement << data.incVals.netSendBytes << ", " << data.incVals.netRecvBytes << ") ";
|
||||
statement << "USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
appendQuery(statement.str());
|
||||
}
|
||||
|
||||
void Cassandra::insertHighResStorageNodeData(std::shared_ptr<Node> node,
|
||||
const HighResolutionStats& data)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO highResStorage ";
|
||||
statement << "(time, nodeNumID, nodeID, workRequests, ";
|
||||
statement << "queuedRequests, diskWriteBytes, diskReadBytes, netSendBytes, netRecvBytes) VALUES (";
|
||||
statement << data.rawVals.statsTimeMS << ", " << node->getNumID() << ", '" << node->getAlias() << "', ";
|
||||
statement << data.incVals.workRequests << ", " << data.rawVals.queuedRequests << ", ";
|
||||
statement << data.incVals.diskWriteBytes << ", " << data.incVals.diskReadBytes << ", ";
|
||||
statement << data.incVals.netSendBytes << ", " << data.incVals.netRecvBytes << ") ";
|
||||
statement << "USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
appendQuery(statement.str());
|
||||
}
|
||||
|
||||
void Cassandra::insertStorageTargetsData(std::shared_ptr<Node> node,
|
||||
const StorageTargetInfo& data)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO storageTargetData ";
|
||||
statement << "(time, nodeNumID, nodeID, storageTargetID, ";
|
||||
statement << "diskSpaceTotal, diskSpaceFree, inodesTotal, inodesFree) VALUES (";
|
||||
statement << "TOTIMESTAMP(NOW()), " << node->getNumID() << ", '" << node->getAlias() << "', ";
|
||||
statement << data.getTargetID() << ", ";
|
||||
statement << data.getDiskSpaceTotal() << ", " << data.getDiskSpaceFree() << ", ";
|
||||
statement << data.getInodesTotal() << ", " << data.getInodesFree() << ") ";
|
||||
statement << "USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
appendQuery(statement.str());
|
||||
}
|
||||
|
||||
void Cassandra::insertClientNodeData(const std::string& id, const NodeType nodeType,
|
||||
const std::map<std::string, uint64_t>& opMap, bool perUser)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO ";
|
||||
if (perUser)
|
||||
{
|
||||
if (nodeType == NODETYPE_Meta)
|
||||
statement << "metaClientOpsByUser";
|
||||
else if (nodeType == NODETYPE_Storage)
|
||||
statement << "storageClientOpsByUser";
|
||||
else
|
||||
throw DatabaseException("Invalid Nodetype given.");
|
||||
|
||||
statement << " (time, user, ops) VALUES (";
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nodeType == NODETYPE_Meta)
|
||||
statement << "metaClientOpsByNode";
|
||||
else if (nodeType == NODETYPE_Storage)
|
||||
statement << "storageClientOpsByNode";
|
||||
else
|
||||
throw DatabaseException("Invalid Nodetype given.");
|
||||
|
||||
statement << " (time, node, ops) VALUES (";
|
||||
}
|
||||
|
||||
statement << "TOTIMESTAMP(NOW()), '" << id << "', {";
|
||||
|
||||
bool first = true;
|
||||
|
||||
for (auto iter = opMap.begin(); iter != opMap.end(); iter++)
|
||||
{
|
||||
if (iter->second == 0)
|
||||
continue;
|
||||
|
||||
statement << (first ? "" : ",") << "'" << iter->first << "':" << iter->second;
|
||||
first = false;
|
||||
}
|
||||
|
||||
statement << "}) USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
// if no fields are != 0, dont write anything
|
||||
if (!first)
|
||||
appendQuery(statement.str());
|
||||
}
|
||||
|
||||
void Cassandra::appendQuery(const std::string& query)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(queryMutex);
|
||||
|
||||
CassStatement* statement = statement_new(query.c_str(), 0);
|
||||
batch_add_statement(batch.get(), statement);
|
||||
statement_free(statement);
|
||||
|
||||
numQueries++;
|
||||
|
||||
if (numQueries >= config.maxInsertsPerBatch)
|
||||
{
|
||||
writeUnlocked();
|
||||
}
|
||||
}
|
||||
|
||||
void Cassandra::write()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(queryMutex);
|
||||
|
||||
if(numQueries)
|
||||
writeUnlocked();
|
||||
}
|
||||
|
||||
void Cassandra::writeUnlocked()
|
||||
{
|
||||
CassFuture* batchFuture = session_execute_batch(session.get(), batch.get());
|
||||
batch.reset(batch_new(CASS_BATCH_TYPE_LOGGED));
|
||||
future_free(batchFuture);
|
||||
|
||||
LOG(DATABASE, DEBUG, "Sent queries to Cassandra.", numQueries);
|
||||
numQueries = 0;
|
||||
}
|
||||
|
||||
80
mon/source/misc/Cassandra.h
Normal file
80
mon/source/misc/Cassandra.h
Normal file
@@ -0,0 +1,80 @@
|
||||
#ifndef CASSANDRA_H_
|
||||
#define CASSANDRA_H_
|
||||
|
||||
#include <common/nodes/NodeType.h>
|
||||
#include <common/threading/Mutex.h>
|
||||
#include <nodes/MetaNodeEx.h>
|
||||
#include <nodes/StorageNodeEx.h>
|
||||
#include <misc/TSDatabase.h>
|
||||
|
||||
#include <cassandra.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
class Cassandra : public TSDatabase
|
||||
{
|
||||
public:
|
||||
|
||||
struct Config
|
||||
{
|
||||
std::string host;
|
||||
int port;
|
||||
std::string database;
|
||||
unsigned maxInsertsPerBatch;
|
||||
unsigned TTLSecs;
|
||||
};
|
||||
|
||||
Cassandra(Config config);
|
||||
virtual ~Cassandra() {};
|
||||
|
||||
virtual void insertMetaNodeData(
|
||||
std::shared_ptr<Node> node, const MetaNodeDataContent& data) override;
|
||||
virtual void insertStorageNodeData(
|
||||
std::shared_ptr<Node> node, const StorageNodeDataContent& data) override;
|
||||
virtual void insertHighResMetaNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
|
||||
virtual void insertHighResStorageNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
|
||||
virtual void insertStorageTargetsData(
|
||||
std::shared_ptr<Node> node, const StorageTargetInfo& data) override;
|
||||
virtual void insertClientNodeData(
|
||||
const std::string& id, const NodeType nodeType,
|
||||
const std::map<std::string, uint64_t>& opMap, bool perUser) override;
|
||||
virtual void write() override;
|
||||
|
||||
private:
|
||||
std::function<decltype(cass_cluster_new)> cluster_new;
|
||||
std::function<decltype(cass_cluster_free)> cluster_free;
|
||||
std::function<decltype(cass_session_new)> session_new;
|
||||
std::function<decltype(cass_session_free)> session_free;
|
||||
std::function<decltype(cass_batch_new)> batch_new;
|
||||
std::function<decltype(cass_batch_free)> batch_free;
|
||||
std::function<decltype(cass_batch_add_statement)> batch_add_statement;
|
||||
std::function<decltype(cass_cluster_set_contact_points)> cluster_set_contact_points;
|
||||
std::function<decltype(cass_cluster_set_port)> cluster_set_port;
|
||||
std::function<decltype(cass_session_connect)> session_connect;
|
||||
std::function<decltype(cass_session_execute)> session_execute;
|
||||
std::function<decltype(cass_session_execute_batch)> session_execute_batch;
|
||||
std::function<decltype(cass_future_error_code)> future_error_code;
|
||||
std::function<decltype(cass_future_error_message)> future_error_message;
|
||||
std::function<decltype(cass_future_free)> future_free;
|
||||
std::function<decltype(cass_statement_new)> statement_new;
|
||||
std::function<decltype(cass_statement_free)> statement_free;
|
||||
|
||||
std::unique_ptr<CassCluster, decltype(cluster_free)> cluster;
|
||||
std::unique_ptr<CassSession, decltype(session_free)> session;
|
||||
std::unique_ptr<CassBatch, decltype(batch_free)> batch;
|
||||
|
||||
const Config config;
|
||||
std::unique_ptr<void, int(*)(void*)> libHandle;
|
||||
|
||||
std::string queryBuffer;
|
||||
unsigned numQueries;
|
||||
|
||||
mutable Mutex queryMutex;
|
||||
|
||||
void appendQuery(const std::string& query);
|
||||
void query(const std::string& query, bool waitForResult = true);
|
||||
void writeUnlocked();
|
||||
};
|
||||
|
||||
#endif
|
||||
153
mon/source/misc/CurlWrapper.cpp
Normal file
153
mon/source/misc/CurlWrapper.cpp
Normal file
@@ -0,0 +1,153 @@
|
||||
#include "CurlWrapper.h"
|
||||
|
||||
#include <exception/CurlException.h>
|
||||
|
||||
CurlWrapper::CurlWrapper(std::chrono::milliseconds timeout, bool checkSSLCertificates) :
|
||||
curlHandle(curl_easy_init(), &curl_easy_cleanup)
|
||||
{
|
||||
if (curlHandle.get() == NULL)
|
||||
throw CurlException("Curl init failed.");
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_ERRORBUFFER, &errorBuffer) != CURLE_OK)
|
||||
throw CurlException("Setting Curl error buffer failed.");
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_NOSIGNAL, 1L) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_TIMEOUT_MS,
|
||||
std::chrono::milliseconds(timeout).count()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_WRITEFUNCTION, writeCallback) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_WRITEDATA, static_cast<void*>(this)) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_CONNECTTIMEOUT_MS,
|
||||
timeout.count()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (!checkSSLCertificates)
|
||||
{
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_SSL_VERIFYPEER, 0) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_SSL_VERIFYHOST, 0) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
void CurlWrapper::enableHttpAuth(const std::string& user, const std::string& password)
|
||||
{
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_HTTPAUTH, CURLAUTH_ANY))
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_USERNAME, user.c_str()))
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_PASSWORD, password.c_str()))
|
||||
throw CurlException(errorBuffer);
|
||||
}
|
||||
|
||||
|
||||
unsigned short CurlWrapper::sendGetRequest(const std::string& url, const ParameterMap& parameters)
|
||||
{
|
||||
std::string parameterStr = makeParameterStr(parameters);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_URL, (url + parameterStr).c_str()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_HTTPGET, 1L) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
// replace with curl_multi_perform?
|
||||
if (curl_easy_perform(curlHandle.get()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
long responseCode;
|
||||
if (curl_easy_getinfo(curlHandle.get(), CURLINFO_RESPONSE_CODE, &responseCode) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
return responseCode;
|
||||
}
|
||||
|
||||
unsigned short CurlWrapper::sendPostRequest(const std::string& url, const char* data,
|
||||
const ParameterMap& parameters, const std::vector<std::string>& headers)
|
||||
{
|
||||
std::string parameterStr = makeParameterStr(parameters);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_URL, (url + parameterStr).c_str()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_POSTFIELDS, data) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
struct curl_slist* headerList = nullptr;
|
||||
for (const auto& header : headers) {
|
||||
headerList = curl_slist_append(headerList, header.c_str());
|
||||
}
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_HTTPHEADER, headerList) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
// replace with curl_multi_perform?
|
||||
if (curl_easy_perform(curlHandle.get()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
long responseCode;
|
||||
if (curl_easy_getinfo(curlHandle.get(), CURLINFO_RESPONSE_CODE, &responseCode) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
return responseCode;
|
||||
}
|
||||
|
||||
std::string CurlWrapper::makeParameterStr(const ParameterMap& parameters) const
|
||||
{
|
||||
if (!parameters.empty())
|
||||
{
|
||||
std::string parameterStr = "?";
|
||||
|
||||
for (auto iter = parameters.begin(); iter != parameters.end(); iter++)
|
||||
{
|
||||
{
|
||||
auto escaped = std::unique_ptr<char, void(*)(void*)> (
|
||||
curl_easy_escape(curlHandle.get(), (iter->first).c_str(),0),
|
||||
&curl_free);
|
||||
|
||||
if (!escaped)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
parameterStr += escaped.get();
|
||||
}
|
||||
|
||||
{
|
||||
auto escaped = std::unique_ptr<char, void(*)(void*)> (
|
||||
curl_easy_escape(curlHandle.get(), (iter->second).c_str(),0),
|
||||
&curl_free);
|
||||
|
||||
if (!escaped)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
parameterStr += "=";
|
||||
parameterStr += escaped.get();
|
||||
parameterStr += "&";
|
||||
}
|
||||
}
|
||||
|
||||
parameterStr.resize(parameterStr.size() - 1);
|
||||
|
||||
return parameterStr;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
size_t CurlWrapper::writeCallback(char *ptr, size_t size, size_t nmemb, void *userdata)
|
||||
{
|
||||
auto instance = static_cast<CurlWrapper*>(userdata);
|
||||
instance->setResponse(std::string(ptr, size*nmemb));
|
||||
|
||||
// Always signal success
|
||||
return size*nmemb;
|
||||
}
|
||||
57
mon/source/misc/CurlWrapper.h
Normal file
57
mon/source/misc/CurlWrapper.h
Normal file
@@ -0,0 +1,57 @@
|
||||
#ifndef CURL_WRAPPER_H_
|
||||
#define CURL_WRAPPER_H_
|
||||
|
||||
#include <common/threading/Mutex.h>
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
class CurlWrapper
|
||||
{
|
||||
public:
|
||||
CurlWrapper(std::chrono::milliseconds timeout, bool checkSSLCertificates);
|
||||
|
||||
CurlWrapper(const CurlWrapper&) = delete;
|
||||
CurlWrapper& operator=(const CurlWrapper&) = delete;
|
||||
CurlWrapper(CurlWrapper&&) = delete;
|
||||
CurlWrapper& operator=(CurlWrapper&&) = delete;
|
||||
|
||||
~CurlWrapper() = default;
|
||||
|
||||
void enableHttpAuth(const std::string& user, const std::string& password);
|
||||
|
||||
typedef std::unordered_map<std::string, std::string> ParameterMap;
|
||||
|
||||
unsigned short sendGetRequest(const std::string& url,
|
||||
const ParameterMap& parameters);
|
||||
unsigned short sendPostRequest(const std::string& url, const char* data,
|
||||
const ParameterMap& parameters, const std::vector<std::string>& headers);
|
||||
|
||||
static size_t writeCallback(char *ptr, size_t size, size_t nmemb, void *userdata);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<CURL, void(*)(void*)> curlHandle;
|
||||
std::string response;
|
||||
|
||||
char errorBuffer[CURL_ERROR_SIZE];
|
||||
|
||||
std::string makeParameterStr(const ParameterMap& parameters) const;
|
||||
|
||||
void setResponse(const std::string& response)
|
||||
{
|
||||
this->response = response;
|
||||
}
|
||||
|
||||
public:
|
||||
const std::string& getResponse() const
|
||||
{
|
||||
return response;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
344
mon/source/misc/InfluxDB.cpp
Normal file
344
mon/source/misc/InfluxDB.cpp
Normal file
@@ -0,0 +1,344 @@
|
||||
#include "InfluxDB.h"
|
||||
|
||||
#include <common/storage/StorageTargetInfo.h>
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <exception/DatabaseException.h>
|
||||
#include <exception/CurlException.h>
|
||||
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
|
||||
static const std::string retentionPolicyName = "auto";
|
||||
|
||||
InfluxDB::InfluxDB(Config cfg) :
|
||||
config(std::move(cfg))
|
||||
{
|
||||
curlWrapper = boost::make_unique<CurlWrapper>(config.httpTimeout, config.curlCheckSSLCertificates);
|
||||
if (config.dbVersion == INFLUXDB)
|
||||
{
|
||||
if (!config.username.empty())
|
||||
curlWrapper->enableHttpAuth(config.username, config.password);
|
||||
|
||||
setupDatabase();
|
||||
}
|
||||
}
|
||||
|
||||
void InfluxDB::setupDatabase() const
|
||||
{
|
||||
// Wait for InfluxDB service being available
|
||||
unsigned tries = 0;
|
||||
while(!sendPing())
|
||||
{
|
||||
tries++;
|
||||
LOG(DATABASE, ERR, "Coudn't reach InfluxDB service.");
|
||||
if (tries >= connectionRetries)
|
||||
throw DatabaseException("Connection to InfluxDB failed.");
|
||||
else
|
||||
LOG(DATABASE, WARNING, "Retrying in 10 seconds.");
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(10));
|
||||
}
|
||||
|
||||
// these are called every time the service starts but is being ignored by influxdb if
|
||||
// the db and rp already exist
|
||||
sendQuery("create database " + config.database);
|
||||
if (config.setRetentionPolicy)
|
||||
{
|
||||
sendQuery("create retention policy " + retentionPolicyName + " on " + config.database
|
||||
+ " duration " + config.retentionDuration
|
||||
+ " replication 1 default");
|
||||
}
|
||||
}
|
||||
|
||||
void InfluxDB::insertMetaNodeData(std::shared_ptr<Node> node, const MetaNodeDataContent& data)
|
||||
{
|
||||
std::ostringstream point;
|
||||
point << "meta";
|
||||
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
|
||||
point << ",nodeNumID=" << node->getNumID();
|
||||
|
||||
if(data.isResponding)
|
||||
{
|
||||
point << " isResponding=" << std::boolalpha << true;
|
||||
point << ",indirectWorkListSize=" << data.indirectWorkListSize;
|
||||
point << ",directWorkListSize=" << data.directWorkListSize;
|
||||
point << ",hostnameid=\"" << data.hostnameid << "\"";
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
point << " isResponding=" << std::boolalpha << false;
|
||||
}
|
||||
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
void InfluxDB::insertStorageNodeData(std::shared_ptr<Node> node,
|
||||
const StorageNodeDataContent& data)
|
||||
{
|
||||
std::ostringstream point;
|
||||
point << "storage";
|
||||
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
|
||||
point << ",nodeNumID=" << node->getNumID();
|
||||
|
||||
if(data.isResponding)
|
||||
{
|
||||
point << " isResponding=" << std::boolalpha << true;
|
||||
point << ",indirectWorkListSize=" << data.indirectWorkListSize;
|
||||
point << ",directWorkListSize=" << data.directWorkListSize;
|
||||
point << ",diskSpaceTotal=" << data.diskSpaceTotal;
|
||||
point << ",diskSpaceFree=" << data.diskSpaceFree;
|
||||
point << ",hostnameid=\"" << data.hostnameid << "\"";
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
point << " isResponding=" << std::boolalpha << false;
|
||||
}
|
||||
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
void InfluxDB::insertHighResMetaNodeData(std::shared_ptr<Node> node,
|
||||
const HighResolutionStats& data)
|
||||
{
|
||||
std::ostringstream point;
|
||||
point << "highResMeta";
|
||||
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
|
||||
point << ",nodeNumID=" << node->getNumID();
|
||||
|
||||
point << " workRequests=" << data.incVals.workRequests;
|
||||
point << ",queuedRequests=" << data.rawVals.queuedRequests;
|
||||
point << ",netSendBytes=" << data.incVals.netSendBytes;
|
||||
point << ",netRecvBytes=" << data.incVals.netRecvBytes;
|
||||
|
||||
// timestamp in ns
|
||||
point << " " << std::chrono::nanoseconds(
|
||||
std::chrono::milliseconds(data.rawVals.statsTimeMS)).count();
|
||||
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
void InfluxDB::insertHighResStorageNodeData(std::shared_ptr<Node> node,
|
||||
const HighResolutionStats& data)
|
||||
{
|
||||
std::ostringstream point;
|
||||
point << "highResStorage";
|
||||
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
|
||||
point << ",nodeNumID=" << node->getNumID();
|
||||
|
||||
point << " workRequests=" << data.incVals.workRequests;
|
||||
point << ",queuedRequests=" << data.rawVals.queuedRequests;
|
||||
point << ",diskWriteBytes=" << data.incVals.diskWriteBytes;
|
||||
point << ",diskReadBytes=" << data.incVals.diskReadBytes;
|
||||
point << ",netSendBytes=" << data.incVals.netSendBytes;
|
||||
point << ",netRecvBytes=" << data.incVals.netRecvBytes;
|
||||
|
||||
// timestamp in ns
|
||||
point << " " << std::chrono::nanoseconds(
|
||||
std::chrono::milliseconds(data.rawVals.statsTimeMS)).count();
|
||||
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
void InfluxDB::insertStorageTargetsData(std::shared_ptr<Node> node,
|
||||
const StorageTargetInfo& data)
|
||||
{
|
||||
std::ostringstream point;
|
||||
point << "storageTargets";
|
||||
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
|
||||
point << ",nodeNumID=" << node->getNumID();
|
||||
point << ",storageTargetID=" << data.getTargetID();
|
||||
|
||||
point << " diskSpaceTotal=" << data.getDiskSpaceTotal();
|
||||
point << ",diskSpaceFree=" << data.getDiskSpaceFree();
|
||||
point << ",inodesTotal=" << data.getInodesTotal();
|
||||
point << ",inodesFree=" << data.getInodesFree();
|
||||
|
||||
std::string t;
|
||||
if (data.getState() == TargetConsistencyState::TargetConsistencyState_GOOD)
|
||||
t = "GOOD";
|
||||
else if (data.getState() == TargetConsistencyState::TargetConsistencyState_NEEDS_RESYNC)
|
||||
t = "NEEDS_RESYNC";
|
||||
else
|
||||
t = "BAD";
|
||||
|
||||
point << ",targetConsistencyState=\"" << t << "\"";
|
||||
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
void InfluxDB::insertClientNodeData(const std::string& id, const NodeType nodeType,
|
||||
const std::map<std::string, uint64_t>& opMap, bool perUser)
|
||||
{
|
||||
std::ostringstream point;
|
||||
if (perUser)
|
||||
{
|
||||
if (nodeType == NODETYPE_Meta)
|
||||
point << "metaClientOpsByUser";
|
||||
else if (nodeType == NODETYPE_Storage)
|
||||
point << "storageClientOpsByUser";
|
||||
else
|
||||
throw DatabaseException("Invalid Nodetype given.");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nodeType == NODETYPE_Meta)
|
||||
point << "metaClientOpsByNode";
|
||||
else if (nodeType == NODETYPE_Storage)
|
||||
point << "storageClientOpsByNode";
|
||||
else
|
||||
throw DatabaseException("Invalid Nodetype given.");
|
||||
}
|
||||
|
||||
point << (perUser ? ",user=" : ",node=") << id;
|
||||
|
||||
bool first = true;
|
||||
|
||||
for (auto iter = opMap.begin(); iter != opMap.end(); iter++)
|
||||
{
|
||||
if (iter->second == 0)
|
||||
continue;
|
||||
|
||||
point << (first ? " " : ",") << iter->first << "=" << iter->second;
|
||||
first = false;
|
||||
}
|
||||
|
||||
// if no fields are != 0, dont write anything
|
||||
if (!first)
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
|
||||
void InfluxDB::appendPoint(const std::string& point)
|
||||
{
|
||||
const std::lock_guard<Mutex> mutexLock(pointsMutex);
|
||||
|
||||
points += point + "\n";
|
||||
numPoints++;
|
||||
|
||||
// test also for size? make it an option?
|
||||
if (numPoints >= config.maxPointsPerRequest)
|
||||
{
|
||||
writePointsUnlocked();
|
||||
}
|
||||
}
|
||||
void InfluxDB::write()
|
||||
{
|
||||
const std::lock_guard<Mutex> mutexLock(pointsMutex);
|
||||
writePointsUnlocked();
|
||||
}
|
||||
|
||||
void InfluxDB::writePointsUnlocked()
|
||||
{
|
||||
sendWrite(points);
|
||||
points.clear();
|
||||
LOG(DATABASE, DEBUG, "Sent data to InfluxDB.", numPoints);
|
||||
numPoints = 0;
|
||||
}
|
||||
|
||||
void InfluxDB::sendWrite(const std::string& data) const
|
||||
{
|
||||
unsigned short responseCode = 0;
|
||||
CurlWrapper::ParameterMap params;
|
||||
std::string url;
|
||||
std::vector<std::string> headers;
|
||||
if (config.dbVersion == INFLUXDB)
|
||||
{
|
||||
params["db"] = config.database;
|
||||
url = config.host + ":" + StringTk::intToStr(config.port) + "/write";
|
||||
}
|
||||
else
|
||||
{
|
||||
params["org"] = config.organization;
|
||||
params["bucket"] = config.bucket;
|
||||
url = config.host + ":" + StringTk::intToStr(config.port) + "/api/v2/write";
|
||||
headers.push_back("Authorization: Token " + config.token);
|
||||
}
|
||||
|
||||
const std::lock_guard<Mutex> mutexLock(curlMutex);
|
||||
|
||||
try
|
||||
{
|
||||
responseCode = curlWrapper->sendPostRequest(url, data.c_str(), params, headers);
|
||||
}
|
||||
catch (const CurlException& e)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Writing to InfluxDB failed due to Curl error.", ("Error", e.what()));
|
||||
return;
|
||||
}
|
||||
|
||||
if (responseCode < 200 || responseCode >= 300)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Writing to InfluxDB failed.", responseCode,
|
||||
("responseMessage", curlWrapper->getResponse()));
|
||||
}
|
||||
}
|
||||
|
||||
void InfluxDB::sendQuery(const std::string& data) const
|
||||
{
|
||||
unsigned short responseCode = 0;
|
||||
CurlWrapper::ParameterMap params;
|
||||
params["db"] = config.database;
|
||||
params["q"] = data;
|
||||
|
||||
const std::lock_guard<Mutex> mutexLock(curlMutex);
|
||||
|
||||
try
|
||||
{
|
||||
responseCode = curlWrapper->sendPostRequest(config.host + ":"
|
||||
+ StringTk::intToStr(config.port)
|
||||
+ "/query", "", params, {});
|
||||
}
|
||||
catch (const CurlException& e)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Querying InfluxDB failed due to Curl error.", ("Error", e.what()));
|
||||
return;
|
||||
}
|
||||
|
||||
if (responseCode < 200 || responseCode >= 300)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Querying InfluxDB failed.", responseCode,
|
||||
("responseMessage", curlWrapper->getResponse()));
|
||||
}
|
||||
}
|
||||
|
||||
bool InfluxDB::sendPing() const
|
||||
{
|
||||
unsigned short responseCode = 0;
|
||||
|
||||
const std::lock_guard<Mutex> mutexLock(curlMutex);
|
||||
|
||||
try
|
||||
{
|
||||
responseCode = curlWrapper->sendGetRequest(config.host + ":"
|
||||
+ StringTk::intToStr(config.port) + "/ping", CurlWrapper::ParameterMap());
|
||||
}
|
||||
catch (const CurlException& e)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Pinging InfluxDB failed due to Curl error.", ("Error", e.what()));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (responseCode < 200 || responseCode >= 300)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Pinging InfluxDB failed.", responseCode,
|
||||
("responseMessage", curlWrapper->getResponse()));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* According to InfluxDB documentation, spaces, "=" and "," need to be escaped for write.
|
||||
*/
|
||||
std::string InfluxDB::escapeStringForWrite(const std::string& str)
|
||||
{
|
||||
std::string result = str;
|
||||
boost::replace_all(result, " ", "\\ ");
|
||||
boost::replace_all(result, "=", "\\=");
|
||||
boost::replace_all(result, ",", "\\,");
|
||||
return result;
|
||||
}
|
||||
84
mon/source/misc/InfluxDB.h
Normal file
84
mon/source/misc/InfluxDB.h
Normal file
@@ -0,0 +1,84 @@
|
||||
#ifndef INFLUXDB_H_
|
||||
#define INFLUXDB_H_
|
||||
|
||||
#include <common/nodes/NodeType.h>
|
||||
#include <common/threading/Mutex.h>
|
||||
#include <nodes/MetaNodeEx.h>
|
||||
#include <nodes/StorageNodeEx.h>
|
||||
#include <misc/CurlWrapper.h>
|
||||
#include <misc/TSDatabase.h>
|
||||
#include <app/Config.h>
|
||||
|
||||
enum InfluxDBVersion
|
||||
{
|
||||
INFLUXDB,
|
||||
INFLUXDB2,
|
||||
};
|
||||
|
||||
class App;
|
||||
|
||||
class InfluxDB : public TSDatabase
|
||||
{
|
||||
public:
|
||||
|
||||
struct Config
|
||||
{
|
||||
std::string host;
|
||||
int port;
|
||||
std::string database;
|
||||
std::chrono::milliseconds httpTimeout;
|
||||
unsigned maxPointsPerRequest;
|
||||
bool setRetentionPolicy;
|
||||
std::string retentionDuration;
|
||||
bool curlCheckSSLCertificates;
|
||||
std::string username;
|
||||
std::string password;
|
||||
std::string bucket;
|
||||
std::string organization;
|
||||
std::string token;
|
||||
InfluxDBVersion dbVersion;
|
||||
|
||||
};
|
||||
|
||||
InfluxDB(Config cfg);
|
||||
virtual ~InfluxDB() {};
|
||||
|
||||
virtual void insertMetaNodeData(
|
||||
std::shared_ptr<Node> node, const MetaNodeDataContent& data) override;
|
||||
virtual void insertStorageNodeData(
|
||||
std::shared_ptr<Node> node, const StorageNodeDataContent& data) override;
|
||||
virtual void insertHighResMetaNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
|
||||
virtual void insertHighResStorageNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
|
||||
virtual void insertStorageTargetsData(
|
||||
std::shared_ptr<Node> node, const StorageTargetInfo& data) override;
|
||||
virtual void insertClientNodeData(
|
||||
const std::string& id, const NodeType nodeType,
|
||||
const std::map<std::string, uint64_t>& opMap, bool perUser) override;
|
||||
virtual void write() override;
|
||||
|
||||
static std::string escapeStringForWrite(const std::string& str);
|
||||
|
||||
private:
|
||||
const Config config;
|
||||
|
||||
std::unique_ptr<CurlWrapper> curlWrapper;
|
||||
|
||||
std::string points;
|
||||
unsigned numPoints = 0;
|
||||
|
||||
mutable Mutex pointsMutex;
|
||||
mutable Mutex curlMutex;
|
||||
|
||||
void setupDatabase() const;
|
||||
void appendPoint(const std::string& point);
|
||||
void writePointsUnlocked();
|
||||
void sendWrite(const std::string& data) const;
|
||||
void sendQuery(const std::string& data) const;
|
||||
bool sendPing() const;
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
34
mon/source/misc/TSDatabase.h
Normal file
34
mon/source/misc/TSDatabase.h
Normal file
@@ -0,0 +1,34 @@
|
||||
#ifndef TS_DATABASE_H_
|
||||
#define TS_DATABASE_H_
|
||||
|
||||
#include <common/nodes/NodeType.h>
|
||||
#include <nodes/MetaNodeEx.h>
|
||||
#include <nodes/StorageNodeEx.h>
|
||||
#include <app/Config.h>
|
||||
|
||||
class TSDatabase
|
||||
{
|
||||
public:
|
||||
static const unsigned connectionRetries = 3;
|
||||
|
||||
TSDatabase() {};
|
||||
virtual ~TSDatabase() {};
|
||||
|
||||
virtual void insertMetaNodeData(
|
||||
std::shared_ptr<Node> node, const MetaNodeDataContent& data) = 0;
|
||||
virtual void insertStorageNodeData(
|
||||
std::shared_ptr<Node> node, const StorageNodeDataContent& data) = 0;
|
||||
virtual void insertHighResMetaNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) = 0;
|
||||
virtual void insertHighResStorageNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) = 0;
|
||||
virtual void insertStorageTargetsData(
|
||||
std::shared_ptr<Node> node, const StorageTargetInfo& data) = 0;
|
||||
virtual void insertClientNodeData(
|
||||
const std::string& id, const NodeType nodeType,
|
||||
const std::map<std::string, uint64_t>& opMap, bool perUser) = 0;
|
||||
|
||||
virtual void write() = 0;
|
||||
};
|
||||
|
||||
#endif
|
||||
49
mon/source/net/message/NetMessageFactory.cpp
Normal file
49
mon/source/net/message/NetMessageFactory.cpp
Normal file
@@ -0,0 +1,49 @@
|
||||
#include <common/net/message/SimpleMsg.h>
|
||||
#include <common/net/message/NetMessageTypes.h>
|
||||
#include <common/net/message/mon/RequestMetaDataRespMsg.h>
|
||||
#include <common/net/message/mon/RequestStorageDataRespMsg.h>
|
||||
#include <common/net/message/control/DummyMsg.h>
|
||||
#include <common/net/message/control/GenericResponseMsg.h>
|
||||
#include <common/net/message/nodes/GetClientStatsRespMsg.h>
|
||||
#include <common/net/message/nodes/GetMirrorBuddyGroupsRespMsg.h>
|
||||
#include <common/net/message/nodes/GetNodesRespMsg.h>
|
||||
#include <common/net/message/nodes/GetTargetMappingsRespMsg.h>
|
||||
#include <common/net/message/storage/lookup/FindOwnerRespMsg.h>
|
||||
|
||||
#include <net/message/nodes/HeartbeatMsgEx.h>
|
||||
|
||||
#include "NetMessageFactory.h"
|
||||
|
||||
/**
|
||||
* @return NetMessage that must be deleted by the caller
|
||||
* (msg->msgType is NETMSGTYPE_Invalid on error)
|
||||
*/
|
||||
std::unique_ptr<NetMessage> NetMessageFactory::createFromMsgType(unsigned short msgType) const
|
||||
{
|
||||
NetMessage* msg;
|
||||
|
||||
switch(msgType)
|
||||
{
|
||||
// The following lines shoudle be grouped by "type of the message" and ordered alphabetically
|
||||
// inside the groups. There should always be one message per line to keep a clear layout
|
||||
// (although this might lead to lines that are longer than usual)
|
||||
|
||||
case NETMSGTYPE_FindOwnerResp: { msg = new FindOwnerRespMsg(); } break;
|
||||
case NETMSGTYPE_GenericResponse: { msg = new GenericResponseMsg(); } break;
|
||||
case NETMSGTYPE_GetClientStatsResp: { msg = new GetClientStatsRespMsg(); } break;
|
||||
case NETMSGTYPE_GetMirrorBuddyGroupsResp: { msg = new GetMirrorBuddyGroupsRespMsg(); } break;
|
||||
case NETMSGTYPE_GetNodesResp: { msg = new GetNodesRespMsg(); } break;
|
||||
case NETMSGTYPE_GetTargetMappingsResp: { msg = new GetTargetMappingsRespMsg(); } break;
|
||||
case NETMSGTYPE_Heartbeat: { msg = new HeartbeatMsgEx(); } break;
|
||||
case NETMSGTYPE_RequestMetaDataResp: { msg = new RequestMetaDataRespMsg(); } break;
|
||||
case NETMSGTYPE_RequestStorageDataResp: { msg = new RequestStorageDataRespMsg(); } break;
|
||||
|
||||
default:
|
||||
{
|
||||
msg = new SimpleMsg(NETMSGTYPE_Invalid);
|
||||
} break;
|
||||
}
|
||||
|
||||
return std::unique_ptr<NetMessage>(msg);
|
||||
}
|
||||
|
||||
13
mon/source/net/message/NetMessageFactory.h
Normal file
13
mon/source/net/message/NetMessageFactory.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef NETMESSAGEFACTORY_H_
|
||||
#define NETMESSAGEFACTORY_H_
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <common/net/message/AbstractNetMessageFactory.h>
|
||||
|
||||
class NetMessageFactory : public AbstractNetMessageFactory
|
||||
{
|
||||
protected:
|
||||
virtual std::unique_ptr<NetMessage> createFromMsgType(unsigned short msgType) const override;
|
||||
} ;
|
||||
|
||||
#endif /*NETMESSAGEFACTORY_H_*/
|
||||
11
mon/source/net/message/nodes/HeartbeatMsgEx.h
Normal file
11
mon/source/net/message/nodes/HeartbeatMsgEx.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#ifndef HEARTBEATMSGEX_H_
|
||||
#define HEARTBEATMSGEX_H_
|
||||
|
||||
#include <common/net/message/nodes/HeartbeatMsg.h>
|
||||
|
||||
// This is only a dummy so the mgmt download doesn't fail
|
||||
|
||||
class HeartbeatMsgEx : public HeartbeatMsg
|
||||
{};
|
||||
|
||||
#endif /*HEARTBEATMSGEX_H_*/
|
||||
17
mon/source/nodes/MetaNodeEx.cpp
Normal file
17
mon/source/nodes/MetaNodeEx.cpp
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "MetaNodeEx.h"
|
||||
|
||||
MetaNodeEx::MetaNodeEx(std::shared_ptr<Node> receivedNode) :
|
||||
Node(NODETYPE_Meta, receivedNode->getAlias(), receivedNode->getNumID(),
|
||||
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
|
||||
receivedNode->getConnPool()->getNicList()),
|
||||
isResponding(true)
|
||||
{}
|
||||
|
||||
MetaNodeEx::MetaNodeEx(std::shared_ptr<Node> receivedNode, std::shared_ptr<MetaNodeEx> oldNode) :
|
||||
Node(NODETYPE_Meta, receivedNode->getAlias(), receivedNode->getNumID(),
|
||||
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
|
||||
receivedNode->getConnPool()->getNicList())
|
||||
{
|
||||
setLastStatRequestTime(oldNode->getLastStatRequestTime());
|
||||
setIsResponding(oldNode->getIsResponding());
|
||||
}
|
||||
55
mon/source/nodes/MetaNodeEx.h
Normal file
55
mon/source/nodes/MetaNodeEx.h
Normal file
@@ -0,0 +1,55 @@
|
||||
#ifndef METANODEEX_H_
|
||||
#define METANODEEX_H_
|
||||
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/Common.h>
|
||||
#include <common/threading/RWLockGuard.h>
|
||||
|
||||
struct MetaNodeDataContent
|
||||
{
|
||||
bool isResponding;
|
||||
unsigned indirectWorkListSize;
|
||||
unsigned directWorkListSize;
|
||||
unsigned sessionCount;
|
||||
std::string hostnameid;
|
||||
};
|
||||
|
||||
class MetaNodeEx: public Node
|
||||
{
|
||||
public:
|
||||
MetaNodeEx(std::shared_ptr<Node> receivedNode);
|
||||
MetaNodeEx(std::shared_ptr<Node> receivedNode, std::shared_ptr<MetaNodeEx> oldNode);
|
||||
|
||||
private:
|
||||
mutable RWLock lock;
|
||||
bool isResponding;
|
||||
std::chrono::milliseconds lastStatRequestTime{0};
|
||||
|
||||
public:
|
||||
std::chrono::milliseconds getLastStatRequestTime() const
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
return lastStatRequestTime;
|
||||
}
|
||||
|
||||
void setLastStatRequestTime(const std::chrono::milliseconds& time)
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_WRITE);
|
||||
lastStatRequestTime = time;
|
||||
}
|
||||
|
||||
bool getIsResponding() const
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
return isResponding;
|
||||
}
|
||||
|
||||
void setIsResponding(bool isResponding)
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_WRITE);
|
||||
this->isResponding = isResponding;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif /*METANODEEX_H_*/
|
||||
6
mon/source/nodes/MgmtNodeEx.cpp
Normal file
6
mon/source/nodes/MgmtNodeEx.cpp
Normal file
@@ -0,0 +1,6 @@
|
||||
#include "MgmtNodeEx.h"
|
||||
|
||||
MgmtNodeEx::MgmtNodeEx(std::string nodeID, NumNodeID nodeNumID, unsigned short portUDP,
|
||||
unsigned short portTCP, NicAddressList& nicList) :
|
||||
Node(NODETYPE_Mgmt, nodeID, nodeNumID, portUDP, portTCP, nicList)
|
||||
{}
|
||||
37
mon/source/nodes/MgmtNodeEx.h
Normal file
37
mon/source/nodes/MgmtNodeEx.h
Normal file
@@ -0,0 +1,37 @@
|
||||
#ifndef MGMTNODEEX_H_
|
||||
#define MGMTNODEEX_H_
|
||||
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/Common.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
struct MgmtdNodeDataContent
|
||||
{
|
||||
bool isResponding;
|
||||
};
|
||||
|
||||
class MgmtNodeEx : public Node
|
||||
{
|
||||
public:
|
||||
MgmtNodeEx(std::string nodeID, NumNodeID nodeNumID, unsigned short portUDP,
|
||||
unsigned short portTCP, NicAddressList& nicList);
|
||||
|
||||
private:
|
||||
MgmtdNodeDataContent data;
|
||||
|
||||
public:
|
||||
MgmtdNodeDataContent getContent()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
return this->data;
|
||||
}
|
||||
|
||||
void setContent(MgmtdNodeDataContent content)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
this->data = content;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /*MGMTNODEEX_H_*/
|
||||
38
mon/source/nodes/NodeStoreMetaEx.cpp
Normal file
38
mon/source/nodes/NodeStoreMetaEx.cpp
Normal file
@@ -0,0 +1,38 @@
|
||||
#include "NodeStoreMetaEx.h"
|
||||
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <nodes/MetaNodeEx.h>
|
||||
|
||||
NodeStoreMetaEx::NodeStoreMetaEx() :
|
||||
NodeStoreServers(NODETYPE_Meta, false)
|
||||
{}
|
||||
|
||||
NodeStoreResult NodeStoreMetaEx::addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
|
||||
NumNodeID* outNodeNumID)
|
||||
{
|
||||
// sanity check: don't allow nodeNumID==0 (only mgmtd allows this)
|
||||
if (!receivedNode->getNumID())
|
||||
return NodeStoreResult::Error;
|
||||
|
||||
std::shared_ptr<MetaNodeEx> newNode;
|
||||
auto storedNode =
|
||||
std::static_pointer_cast<MetaNodeEx>(referenceNode(receivedNode->getNumID()));
|
||||
if (!storedNode)
|
||||
{
|
||||
// new node, create StorageNodeEx object with the parameters of the received node info
|
||||
newNode = std::make_shared<MetaNodeEx>(receivedNode);
|
||||
LOG(GENERAL, DEBUG, "Received new meta node.",
|
||||
("nodeNumID", receivedNode->getNumID().val()));
|
||||
}
|
||||
else
|
||||
{
|
||||
// already stored node, create StorageNodeEx object with the parameters of the
|
||||
// received node info and keep the internal data
|
||||
newNode = std::make_shared<MetaNodeEx>(receivedNode, storedNode);
|
||||
LOG(GENERAL, DEBUG, "Received update for meta node.",
|
||||
("nodeNumID", receivedNode->getNumID().val()));
|
||||
}
|
||||
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
return addOrUpdateNodeUnlocked(std::move(newNode), nullptr);
|
||||
}
|
||||
16
mon/source/nodes/NodeStoreMetaEx.h
Normal file
16
mon/source/nodes/NodeStoreMetaEx.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef NODESTOREMETAEX_H_
|
||||
#define NODESTOREMETAEX_H_
|
||||
|
||||
#include <common/nodes/NodeStore.h>
|
||||
|
||||
class NodeStoreMetaEx : public NodeStoreServers
|
||||
{
|
||||
public:
|
||||
NodeStoreMetaEx();
|
||||
|
||||
virtual NodeStoreResult addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
|
||||
NumNodeID* outNodeNumID) override;
|
||||
|
||||
};
|
||||
|
||||
#endif /*NODESTOREMETAEX_H_*/
|
||||
29
mon/source/nodes/NodeStoreMgmtEx.cpp
Normal file
29
mon/source/nodes/NodeStoreMgmtEx.cpp
Normal file
@@ -0,0 +1,29 @@
|
||||
#include "NodeStoreMgmtEx.h"
|
||||
|
||||
NodeStoreMgmtEx::NodeStoreMgmtEx() :
|
||||
NodeStoreServers(NODETYPE_Mgmt, false)
|
||||
{}
|
||||
|
||||
NodeStoreResult NodeStoreMgmtEx::addOrUpdateNodeEx(std::shared_ptr<Node> node, NumNodeID* outNodeNumID)
|
||||
{
|
||||
std::string nodeID(node->getAlias());
|
||||
NumNodeID nodeNumID = node->getNumID();
|
||||
|
||||
// sanity check: don't allow nodeNumID==0 (only mgmtd allows this)
|
||||
if (!node->getNumID())
|
||||
return NodeStoreResult::Error;
|
||||
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
|
||||
// check if this is a new node
|
||||
auto iter = activeNodes.find(nodeNumID);
|
||||
if (iter == activeNodes.end() )
|
||||
{
|
||||
NicAddressList nicList = node->getNicList();
|
||||
|
||||
node = boost::make_unique<MgmtNodeEx>(nodeID, nodeNumID, node->getPortUDP(),
|
||||
node->getPortTCP(), nicList);
|
||||
}
|
||||
|
||||
return addOrUpdateNodeUnlocked(std::move(node), outNodeNumID);
|
||||
}
|
||||
15
mon/source/nodes/NodeStoreMgmtEx.h
Normal file
15
mon/source/nodes/NodeStoreMgmtEx.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#ifndef NODESTOREMGMTDEX_H_
|
||||
#define NODESTOREMGMTDEX_H_
|
||||
|
||||
#include <common/nodes/NodeStore.h>
|
||||
#include <nodes/MgmtNodeEx.h>
|
||||
|
||||
class NodeStoreMgmtEx : public NodeStoreServers
|
||||
{
|
||||
public:
|
||||
NodeStoreMgmtEx();
|
||||
|
||||
virtual NodeStoreResult addOrUpdateNodeEx(std::shared_ptr<Node> node, NumNodeID* outNodeNumID) override;
|
||||
};
|
||||
|
||||
#endif /*NODESTOREMGMTDEX_H_*/
|
||||
38
mon/source/nodes/NodeStoreStorageEx.cpp
Normal file
38
mon/source/nodes/NodeStoreStorageEx.cpp
Normal file
@@ -0,0 +1,38 @@
|
||||
#include "NodeStoreStorageEx.h"
|
||||
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <nodes/StorageNodeEx.h>
|
||||
|
||||
NodeStoreStorageEx::NodeStoreStorageEx() :
|
||||
NodeStoreServers(NODETYPE_Storage, false)
|
||||
{}
|
||||
|
||||
NodeStoreResult NodeStoreStorageEx::addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
|
||||
NumNodeID* outNodeNumID)
|
||||
{
|
||||
// sanity check: don't allow nodeNumID==0 (only mgmtd allows this)
|
||||
if (!receivedNode->getNumID())
|
||||
return NodeStoreResult::Error;
|
||||
|
||||
std::shared_ptr<StorageNodeEx> newNode;
|
||||
auto storedNode =
|
||||
std::static_pointer_cast<StorageNodeEx>(referenceNode(receivedNode->getNumID()));
|
||||
if (!storedNode)
|
||||
{
|
||||
// new node, create StorageNodeEx object with the parameters of the received node info
|
||||
newNode = std::make_shared<StorageNodeEx>(receivedNode);
|
||||
LOG(GENERAL, DEBUG, "Received new storage node.",
|
||||
("nodeNumID", receivedNode->getNumID().val()));
|
||||
}
|
||||
else
|
||||
{
|
||||
// already stored node, create StorageNodeEx object with the parameters of the
|
||||
// received node info and keep the internal data
|
||||
newNode = std::make_shared<StorageNodeEx>(receivedNode, storedNode);
|
||||
LOG(GENERAL, DEBUG, "Received update for storage node.",
|
||||
("nodeNumID", receivedNode->getNumID().val()));
|
||||
}
|
||||
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
return addOrUpdateNodeUnlocked(std::move(newNode), outNodeNumID);
|
||||
}
|
||||
15
mon/source/nodes/NodeStoreStorageEx.h
Normal file
15
mon/source/nodes/NodeStoreStorageEx.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#ifndef NODESTORESTORAGEEX_H_
|
||||
#define NODESTORESTORAGEEX_H_
|
||||
|
||||
#include <common/nodes/NodeStore.h>
|
||||
|
||||
class NodeStoreStorageEx : public NodeStoreServers
|
||||
{
|
||||
public:
|
||||
NodeStoreStorageEx();
|
||||
|
||||
virtual NodeStoreResult addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
|
||||
NumNodeID* outNodeNumID) override;
|
||||
};
|
||||
|
||||
#endif /*NODESTORESTORAGEEX_H_*/
|
||||
18
mon/source/nodes/StorageNodeEx.cpp
Normal file
18
mon/source/nodes/StorageNodeEx.cpp
Normal file
@@ -0,0 +1,18 @@
|
||||
#include "StorageNodeEx.h"
|
||||
|
||||
StorageNodeEx::StorageNodeEx(std::shared_ptr<Node> receivedNode) :
|
||||
Node(NODETYPE_Storage, receivedNode->getAlias(), receivedNode->getNumID(),
|
||||
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
|
||||
receivedNode->getConnPool()->getNicList()),
|
||||
isResponding(true)
|
||||
{}
|
||||
|
||||
StorageNodeEx::StorageNodeEx(std::shared_ptr<Node> receivedNode,
|
||||
std::shared_ptr<StorageNodeEx> oldNode) :
|
||||
Node(NODETYPE_Storage, receivedNode->getAlias(), receivedNode->getNumID(),
|
||||
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
|
||||
receivedNode->getConnPool()->getNicList())
|
||||
{
|
||||
setLastStatRequestTime(oldNode->getLastStatRequestTime());
|
||||
setIsResponding(oldNode->getIsResponding());
|
||||
}
|
||||
61
mon/source/nodes/StorageNodeEx.h
Normal file
61
mon/source/nodes/StorageNodeEx.h
Normal file
@@ -0,0 +1,61 @@
|
||||
#ifndef STORAGENODEEX_H_
|
||||
#define STORAGENODEEX_H_
|
||||
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/Common.h>
|
||||
#include <common/threading/RWLockGuard.h>
|
||||
|
||||
struct StorageNodeDataContent
|
||||
{
|
||||
bool isResponding;
|
||||
|
||||
unsigned indirectWorkListSize;
|
||||
unsigned directWorkListSize;
|
||||
|
||||
int64_t diskSpaceTotal;
|
||||
int64_t diskSpaceFree;
|
||||
int64_t diskRead;
|
||||
int64_t diskWrite;
|
||||
|
||||
unsigned sessionCount;
|
||||
std::string hostnameid;
|
||||
};
|
||||
|
||||
class StorageNodeEx : public Node
|
||||
{
|
||||
public:
|
||||
StorageNodeEx(std::shared_ptr<Node> receivedNode);
|
||||
StorageNodeEx(std::shared_ptr<Node> receivedNode, std::shared_ptr<StorageNodeEx> oldNode);
|
||||
|
||||
private:
|
||||
mutable RWLock lock;
|
||||
bool isResponding;
|
||||
std::chrono::milliseconds lastStatRequestTime{0};
|
||||
|
||||
public:
|
||||
std::chrono::milliseconds getLastStatRequestTime() const
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
return lastStatRequestTime;
|
||||
}
|
||||
|
||||
void setLastStatRequestTime(const std::chrono::milliseconds& time)
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
lastStatRequestTime = time;
|
||||
}
|
||||
|
||||
bool getIsResponding() const
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
return isResponding;
|
||||
}
|
||||
|
||||
void setIsResponding(bool isResponding)
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
this->isResponding = isResponding;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /*STORAGENODEEX_H_*/
|
||||
Reference in New Issue
Block a user