New upstream version 8.1.0
This commit is contained in:
324
mon/source/app/App.cpp
Normal file
324
mon/source/app/App.cpp
Normal file
@@ -0,0 +1,324 @@
|
||||
#include "App.h"
|
||||
|
||||
#include <app/SignalHandler.h>
|
||||
#include <common/components/ComponentInitException.h>
|
||||
#include <common/components/worker/DummyWork.h>
|
||||
#include <misc/Cassandra.h>
|
||||
#include <misc/InfluxDB.h>
|
||||
|
||||
|
||||
App::App(int argc, char** argv) :
|
||||
argc(argc), argv(argv)
|
||||
{}
|
||||
|
||||
void App::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
cfg = boost::make_unique<Config>(argc,argv);
|
||||
runNormal();
|
||||
appResult = AppCode::NO_ERROR;
|
||||
}
|
||||
catch (const InvalidConfigException& e)
|
||||
{
|
||||
std::ostringstream err;
|
||||
err << "Config error: " << e.what() << std::endl
|
||||
<< "[BeeGFS Mon Version: " << BEEGFS_VERSION << std::endl
|
||||
<< "Refer to the default config file (/etc/beegfs/beegfs-mon.conf)" << std::endl
|
||||
<< "or visit http://www.beegfs.com to find out about configuration options.]";
|
||||
printOrLogError(err.str());
|
||||
appResult = AppCode::INVALID_CONFIG;
|
||||
}
|
||||
catch (const ComponentInitException& e)
|
||||
{
|
||||
printOrLogError("Component initialization error: " + std::string(e.what()));
|
||||
appResult = AppCode::INITIALIZATION_ERROR;
|
||||
}
|
||||
catch (const std::runtime_error& e)
|
||||
{
|
||||
printOrLogError("Runtime error: " + std::string(e.what()));
|
||||
appResult = AppCode::RUNTIME_ERROR;
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
printOrLogError("Generic error: " + std::string(e.what()));
|
||||
appResult = AppCode::RUNTIME_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
void App::printOrLogError(const std::string& text) const
|
||||
{
|
||||
if (Logger::isInitialized())
|
||||
LOG(GENERAL, ERR, text);
|
||||
else
|
||||
std::cerr << std::endl << text << std::endl << std::endl;
|
||||
}
|
||||
|
||||
void App::runNormal()
|
||||
{
|
||||
Logger::createLogger(cfg->getLogLevel(), cfg->getLogType(), cfg->getLogNoDate(),
|
||||
cfg->getLogStdFile(), cfg->getLogNumLines(), cfg->getLogNumRotatedFiles());
|
||||
|
||||
pidFileLockFD = createAndLockPIDFile(cfg->getPIDFile());
|
||||
initDataObjects();
|
||||
SignalHandler::registerSignalHandler(this);
|
||||
initLocalNodeInfo();
|
||||
initWorkers();
|
||||
initComponents();
|
||||
|
||||
RDMASocket::rdmaForkInitOnce();
|
||||
|
||||
|
||||
if (cfg->getRunDaemonized())
|
||||
daemonize();
|
||||
|
||||
logInfos();
|
||||
|
||||
// make sure components don't receive SIGINT/SIGTERM (blocked signals are inherited)
|
||||
PThread::blockInterruptSignals();
|
||||
startWorkers();
|
||||
startComponents();
|
||||
PThread::unblockInterruptSignals();
|
||||
|
||||
joinComponents();
|
||||
joinWorkers();
|
||||
}
|
||||
|
||||
void App::initLocalNodeInfo()
|
||||
{
|
||||
bool useRDMA = cfg->getConnUseRDMA();
|
||||
unsigned portUDP = cfg->getConnMonPort();
|
||||
|
||||
StringList allowedInterfaces;
|
||||
std::string interfacesFilename = cfg->getConnInterfacesFile();
|
||||
if (interfacesFilename.length() )
|
||||
cfg->loadStringListFile(interfacesFilename.c_str(), allowedInterfaces);
|
||||
|
||||
NetworkInterfaceCard::findAll(&allowedInterfaces, useRDMA, &localNicList);
|
||||
|
||||
if (localNicList.empty() )
|
||||
throw InvalidConfigException("Couldn't find any usable NIC");
|
||||
|
||||
localNicList.sort(NetworkInterfaceCard::NicAddrComp{&allowedInterfaces});
|
||||
NetworkInterfaceCard::supportedCapabilities(&localNicList, &localNicCaps);
|
||||
|
||||
noDefaultRouteNets = std::make_shared<NetVector>();
|
||||
if(!initNoDefaultRouteList(noDefaultRouteNets.get()))
|
||||
throw InvalidConfigException("Failed to parse connNoDefaultRoute");
|
||||
|
||||
initRoutingTable();
|
||||
updateRoutingTable();
|
||||
|
||||
std::string nodeID = System::getHostname();
|
||||
|
||||
// TODO add a Mon nodetype at some point
|
||||
localNode = std::make_shared<LocalNode>(NODETYPE_Client, nodeID, NumNodeID(1), portUDP, 0, localNicList);
|
||||
}
|
||||
|
||||
void App::initDataObjects()
|
||||
{
|
||||
netFilter = boost::make_unique<NetFilter>(cfg->getConnNetFilterFile());
|
||||
tcpOnlyFilter = boost::make_unique<NetFilter>(cfg->getConnTcpOnlyFilterFile());
|
||||
netMessageFactory = boost::make_unique<NetMessageFactory>();
|
||||
workQueue = boost::make_unique<MultiWorkQueue>();
|
||||
|
||||
targetMapper = boost::make_unique<TargetMapper>();
|
||||
|
||||
metaNodes = boost::make_unique<NodeStoreMetaEx>();
|
||||
storageNodes = boost::make_unique<NodeStoreStorageEx>();
|
||||
mgmtNodes = boost::make_unique<NodeStoreMgmtEx>();
|
||||
|
||||
metaBuddyGroupMapper = boost::make_unique<MirrorBuddyGroupMapper>();
|
||||
storageBuddyGroupMapper = boost::make_unique<MirrorBuddyGroupMapper>();
|
||||
|
||||
|
||||
if (cfg->getDbType() == Config::DbTypes::CASSANDRA)
|
||||
{
|
||||
Cassandra::Config cassandraConfig;
|
||||
cassandraConfig.host = cfg->getDbHostName();
|
||||
cassandraConfig.port = cfg->getDbHostPort();
|
||||
cassandraConfig.database = cfg->getDbDatabase();
|
||||
cassandraConfig.maxInsertsPerBatch = cfg->getCassandraMaxInsertsPerBatch();
|
||||
cassandraConfig.TTLSecs = cfg->getCassandraTTLSecs();
|
||||
|
||||
tsdb = boost::make_unique<Cassandra>(std::move(cassandraConfig));
|
||||
}
|
||||
else // Config::DbTypes::INFLUXDB OR Config::DbTypes::INFLUXDB2
|
||||
{
|
||||
InfluxDB::Config influxdbConfig;
|
||||
influxdbConfig.host = cfg->getDbHostName();
|
||||
influxdbConfig.port = cfg->getDbHostPort();
|
||||
influxdbConfig.maxPointsPerRequest = cfg->getInfluxdbMaxPointsPerRequest();
|
||||
influxdbConfig.httpTimeout = cfg->getHttpTimeout();
|
||||
influxdbConfig.curlCheckSSLCertificates = cfg->getCurlCheckSSLCertificates();
|
||||
if (cfg->getDbType() == Config::DbTypes::INFLUXDB2)
|
||||
{
|
||||
influxdbConfig.bucket = cfg->getDbBucket();
|
||||
influxdbConfig.organization = cfg->getDbAuthOrg();
|
||||
influxdbConfig.token = cfg->getDbAuthToken();
|
||||
influxdbConfig.dbVersion = INFLUXDB2;
|
||||
}
|
||||
else
|
||||
{
|
||||
influxdbConfig.database = cfg->getDbDatabase();
|
||||
influxdbConfig.setRetentionPolicy = cfg->getInfluxDbSetRetentionPolicy();
|
||||
influxdbConfig.retentionDuration = cfg->getInfluxDbRetentionDuration();
|
||||
influxdbConfig.username = cfg->getDbAuthUsername();
|
||||
influxdbConfig.password = cfg->getDbAuthPassword();
|
||||
influxdbConfig.dbVersion = INFLUXDB;
|
||||
}
|
||||
tsdb = boost::make_unique<InfluxDB>(std::move(influxdbConfig));
|
||||
}
|
||||
}
|
||||
|
||||
void App::initComponents()
|
||||
{
|
||||
nodeListRequestor = boost::make_unique<NodeListRequestor>(this);
|
||||
statsCollector = boost::make_unique<StatsCollector>(this);
|
||||
cleanUp = boost::make_unique<CleanUp>(this);
|
||||
}
|
||||
|
||||
void App::startComponents()
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Starting components...");
|
||||
nodeListRequestor->start();
|
||||
statsCollector->start();
|
||||
cleanUp->start();
|
||||
LOG(GENERAL, DEBUG, "Components running.");
|
||||
}
|
||||
|
||||
void App::stopComponents()
|
||||
{
|
||||
if (nodeListRequestor)
|
||||
nodeListRequestor->selfTerminate();
|
||||
if (statsCollector)
|
||||
statsCollector->selfTerminate();
|
||||
if (cleanUp)
|
||||
cleanUp->selfTerminate();
|
||||
|
||||
stopWorkers();
|
||||
selfTerminate();
|
||||
}
|
||||
|
||||
void App::joinComponents()
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Joining Component threads...");
|
||||
nodeListRequestor->join();
|
||||
statsCollector->join();
|
||||
cleanUp->join();
|
||||
LOG(GENERAL, CRITICAL, "All components stopped. Exiting now.");
|
||||
}
|
||||
|
||||
void App::initWorkers()
|
||||
{
|
||||
const unsigned numDirectWorkers = 1;
|
||||
const unsigned workersBufSize = 1024*1024;
|
||||
|
||||
unsigned numWorkers = cfg->getTuneNumWorkers();
|
||||
|
||||
for (unsigned i=0; i < numWorkers; i++)
|
||||
{
|
||||
auto worker = boost::make_unique<Worker>("Worker" + StringTk::intToStr(i+1),
|
||||
workQueue.get(), QueueWorkType_INDIRECT);
|
||||
|
||||
worker->setBufLens(workersBufSize, workersBufSize);
|
||||
workerList.push_back(std::move(worker));
|
||||
}
|
||||
|
||||
for (unsigned i=0; i < numDirectWorkers; i++)
|
||||
{
|
||||
auto worker = boost::make_unique<Worker>("DirectWorker" + StringTk::intToStr(i+1),
|
||||
workQueue.get(), QueueWorkType_DIRECT);
|
||||
|
||||
worker->setBufLens(workersBufSize, workersBufSize);
|
||||
workerList.push_back(std::move(worker));
|
||||
}
|
||||
}
|
||||
|
||||
void App::startWorkers()
|
||||
{
|
||||
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
|
||||
{
|
||||
(*worker)->start();
|
||||
}
|
||||
}
|
||||
|
||||
void App::stopWorkers()
|
||||
{
|
||||
// need two loops because we don't know if the worker that handles the work will be the same that
|
||||
// received the self-terminate-request
|
||||
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
|
||||
{
|
||||
(*worker)->selfTerminate();
|
||||
|
||||
// add dummy work to wake up the worker immediately for faster self termination
|
||||
PersonalWorkQueue* personalQ = (*worker)->getPersonalWorkQueue();
|
||||
workQueue->addPersonalWork(new DummyWork(), personalQ);
|
||||
}
|
||||
}
|
||||
|
||||
void App::joinWorkers()
|
||||
{
|
||||
|
||||
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
|
||||
{
|
||||
waitForComponentTermination((*worker).get());
|
||||
}
|
||||
}
|
||||
|
||||
void App::logInfos()
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, std::string("Version: ") + BEEGFS_VERSION);
|
||||
#ifdef BEEGFS_DEBUG
|
||||
LOG(GENERAL, DEBUG, "--DEBUG VERSION--");
|
||||
#endif
|
||||
|
||||
// list usable network interfaces
|
||||
NicAddressList nicList = getLocalNicList();
|
||||
logUsableNICs(NULL, nicList);
|
||||
|
||||
// print net filters
|
||||
if (netFilter->getNumFilterEntries() )
|
||||
{
|
||||
LOG(GENERAL, WARNING, std::string("Net filters: ")
|
||||
+ StringTk::uintToStr(netFilter->getNumFilterEntries() ) );
|
||||
}
|
||||
|
||||
if (tcpOnlyFilter->getNumFilterEntries() )
|
||||
{
|
||||
LOG(GENERAL, WARNING, std::string("TCP-only filters: ")
|
||||
+ StringTk::uintToStr(tcpOnlyFilter->getNumFilterEntries() ) );
|
||||
}
|
||||
}
|
||||
|
||||
void App::daemonize()
|
||||
{
|
||||
int nochdir = 1; // 1 to keep working directory
|
||||
int noclose = 0; // 1 to keep stdin/-out/-err open
|
||||
|
||||
LOG(GENERAL, CRITICAL, "Detaching process...");
|
||||
|
||||
int detachRes = daemon(nochdir, noclose);
|
||||
if (detachRes == -1)
|
||||
throw std::runtime_error(std::string("Unable to detach process: ")
|
||||
+ System::getErrString());
|
||||
|
||||
updateLockedPIDFile(pidFileLockFD); // ignored if pidFileFD is -1
|
||||
}
|
||||
|
||||
void App::handleComponentException(std::exception& e)
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, "This component encountered an unrecoverable error.", sysErr,
|
||||
("Exception", e.what()));
|
||||
|
||||
LOG(GENERAL, WARNING, "Shutting down...");
|
||||
stopComponents();
|
||||
}
|
||||
|
||||
void App::handleNetworkInterfaceFailure(const std::string& devname)
|
||||
{
|
||||
// Nothing to do. This App has no internodeSyncer that would rescan the
|
||||
// netdevs.
|
||||
LOG(GENERAL, ERR, "Network interface failure.",
|
||||
("Device", devname));
|
||||
}
|
||||
Reference in New Issue
Block a user