New upstream version 8.1.0
This commit is contained in:
122
mon/CMakeLists.txt
Normal file
122
mon/CMakeLists.txt
Normal file
@@ -0,0 +1,122 @@
|
||||
include_directories(
|
||||
source
|
||||
)
|
||||
|
||||
add_library(
|
||||
mon STATIC
|
||||
./source/exception/CurlException.h
|
||||
./source/exception/DatabaseException.h
|
||||
./source/net/message/NetMessageFactory.h
|
||||
./source/net/message/NetMessageFactory.cpp
|
||||
./source/net/message/nodes/HeartbeatMsgEx.h
|
||||
./source/components/NodeListRequestor.cpp
|
||||
./source/components/StatsCollector.h
|
||||
./source/components/StatsCollector.cpp
|
||||
./source/components/NodeListRequestor.h
|
||||
./source/components/worker/GetNodesWork.cpp
|
||||
./source/components/worker/RequestMetaDataWork.cpp
|
||||
./source/components/worker/RequestStorageDataWork.cpp
|
||||
./source/components/worker/RequestStorageDataWork.h
|
||||
./source/components/worker/RequestMetaDataWork.h
|
||||
./source/components/worker/GetNodesWork.h
|
||||
./source/components/CleanUp.cpp
|
||||
./source/components/CleanUp.h
|
||||
./source/app/Config.h
|
||||
./source/app/App.h
|
||||
./source/app/Config.cpp
|
||||
./source/app/App.cpp
|
||||
./source/app/SignalHandler.cpp
|
||||
./source/app/SignalHandler.h
|
||||
./source/app/Main.cpp
|
||||
./source/misc/CurlWrapper.cpp
|
||||
./source/misc/InfluxDB.cpp
|
||||
./source/misc/CurlWrapper.h
|
||||
./source/misc/Cassandra.h
|
||||
./source/misc/InfluxDB.h
|
||||
./source/misc/Cassandra.cpp
|
||||
./source/misc/TSDatabase.h
|
||||
./source/nodes/NodeStoreMgmtEx.cpp
|
||||
./source/nodes/NodeStoreStorageEx.cpp
|
||||
./source/nodes/NodeStoreMetaEx.h
|
||||
./source/nodes/StorageNodeEx.h
|
||||
./source/nodes/NodeStoreMetaEx.cpp
|
||||
./source/nodes/MetaNodeEx.cpp
|
||||
./source/nodes/MgmtNodeEx.cpp
|
||||
./source/nodes/NodeStoreStorageEx.h
|
||||
./source/nodes/StorageNodeEx.cpp
|
||||
./source/nodes/MetaNodeEx.h
|
||||
./source/nodes/MgmtNodeEx.h
|
||||
./source/nodes/NodeStoreMgmtEx.h
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
mon PRIVATE
|
||||
../thirdparty/source/datastax
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
mon
|
||||
beegfs-common
|
||||
pthread
|
||||
dl
|
||||
curl
|
||||
)
|
||||
|
||||
add_executable(
|
||||
beegfs-mon
|
||||
source/app/Main.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
beegfs-mon
|
||||
mon
|
||||
)
|
||||
|
||||
# if(NOT BEEGFS_SKIP_TESTS)
|
||||
# add_executable(
|
||||
# test-meta
|
||||
# # no tests yet
|
||||
# )
|
||||
#
|
||||
# target_link_libraries(
|
||||
# test-mon
|
||||
# mon
|
||||
# gtest
|
||||
# )
|
||||
#
|
||||
# # required for a test
|
||||
# file(
|
||||
# COPY ${CMAKE_CURRENT_SOURCE_DIR}/build/dist/etc/beegfs-mon.conf
|
||||
# DESTINATION dist/etc/
|
||||
# )
|
||||
#
|
||||
# add_test(
|
||||
# NAME test-mon
|
||||
# COMMAND test-mon --compiler
|
||||
# )
|
||||
# endif()
|
||||
|
||||
install(
|
||||
TARGETS beegfs-mon
|
||||
DESTINATION "usr/sbin"
|
||||
COMPONENT "mon"
|
||||
)
|
||||
|
||||
install(
|
||||
FILES "build/dist/usr/lib/systemd/system/beegfs-mon.service" "build/dist/usr/lib/systemd/system/beegfs-mon@.service"
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/systemd/system"
|
||||
COMPONENT "mon"
|
||||
)
|
||||
|
||||
install(
|
||||
FILES "build/dist/etc/beegfs-mon.conf"
|
||||
DESTINATION "etc/beegfs"
|
||||
COMPONENT "mon"
|
||||
)
|
||||
|
||||
install(
|
||||
FILES "build/dist/etc/beegfs-mon.auth"
|
||||
DESTINATION "etc/beegfs"
|
||||
COMPONENT "mon"
|
||||
)
|
||||
|
||||
104
mon/README.txt
Normal file
104
mon/README.txt
Normal file
@@ -0,0 +1,104 @@
|
||||
BeeGFS monitoring service README
|
||||
================================
|
||||
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The BeeGFS monitoring service (beegfs-mon) collects statistical data from the
|
||||
various BeeGFS nodes and stores it into a time series database (at the moment InfluxDB and Apache
|
||||
Cassandra are supported).
|
||||
|
||||
|
||||
Prerequisites and dependencies
|
||||
------------------------------
|
||||
|
||||
We highly recommend to use InfluxDB as backend unless you already have a Cassandra Cluster in use
|
||||
that you want to utilize for mon. The next sections only refer to InfluxDB, if you want to use
|
||||
Cassandra, please refer to the last paragraph.
|
||||
|
||||
InfluxDB and Grafana are NOT included within this package for several reasons:
|
||||
|
||||
* The user might want to run the InfluxDB server on another machine and/or wants
|
||||
to integrate the beegfs-mon into an already existing setup.
|
||||
* The user might want to use his own or other thirdparty tools to evaluate the
|
||||
collected data
|
||||
* They can be updated independently by the user whenever he wants to.
|
||||
|
||||
So, to use beegfs-mon, a working and reachable InfluxDB setup is required. Installing
|
||||
InfluxDB should be simple in most cases since there are prebuilt packages available
|
||||
for all of the distributions that are supported by BeeGFS.
|
||||
The installation instructions can be found at
|
||||
|
||||
https://docs.influxdata.com/influxdb/v1.3/introduction/installation/ .
|
||||
|
||||
|
||||
Grafana, on the other hand, is completely optional. It's completely up to the user what
|
||||
he wants to do with the data stored in the database. However, for the sake of simplicity,
|
||||
we provide some prebuilt Grafana dashboards that can be easily imported into the
|
||||
Grafana setup and used for monitoring. The installation instructions can be
|
||||
found at
|
||||
|
||||
http://docs.grafana.org/installation/ .
|
||||
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
### Meet the prerequisites
|
||||
|
||||
If there isn't an already running InfluxDB service that you want to use, install and start
|
||||
InfluxDB first (look above for the link to the installation documentation).
|
||||
If the service runs on another host, make sure it is reachable via HTTP.
|
||||
|
||||
### Grafana Dashboards
|
||||
|
||||
If you want an out of the box solution, you should use the provided Grafana panels
|
||||
for visualization. So, install Grafana (again, look above for
|
||||
the installation instructions) and make sure it can reach the InfluxDB service via network.
|
||||
|
||||
|
||||
#### Default installation
|
||||
|
||||
You can then use the provided installation script which can be found under
|
||||
scripts/import-dashboards. For the out-of-the-box setup with InfluxDB and Grafana being
|
||||
on the same host, just use
|
||||
|
||||
import-dashboards default
|
||||
|
||||
|
||||
#### Custom installation
|
||||
|
||||
In any other case, either provide the script with the URLs to InfluxDB and Grafana
|
||||
(call the script without arguments for usage instruction) or install them manually.
|
||||
The latter can be done from within Grafanas web interface:
|
||||
|
||||
First, the datasource must be defined. In the main menu, click on "Data Sources" and
|
||||
then "Add Data Source". Enter a name, hostname and port where your InfluxDB is running. Save.
|
||||
|
||||
To add the dashboards, select "Dashboards/Import" in the main menu. Navigate to [...] and select
|
||||
one of the dashboard .json files. Select the datasource you created before in the dropdown and
|
||||
click "Import". Repeat for the rest of the panels.
|
||||
|
||||
You can now click on "Dashboards" in the main menu and then on the Button to the right of it.
|
||||
A list of the installed dashboards should pop up, in which you can select the one you want to watch.
|
||||
If your BeeGFS setup, the beegfs-mon daemon and InfluxDB are already running and are configured
|
||||
properly, you should already see some data being collected.
|
||||
|
||||
|
||||
For more documentation and help in using Grafana, please visit the official website
|
||||
http://docs.grafana.org.
|
||||
|
||||
|
||||
Apache Cassandra
|
||||
----------------
|
||||
|
||||
If you want to use Cassandra, please be aware that currently there are no Grafana panels for it
|
||||
available.
|
||||
|
||||
To configure beegfs-mon to use Cassandra, you need to install the datastax cassandra client library
|
||||
on your system which you can find here: https://github.com/datastax/cpp-driver.
|
||||
It has to be the version 2.9. beegfs-mon loads the library dynamically, so no recompilation is
|
||||
required. The beegfs-mon config file needs to be edited to use the cassandra plugin. The available
|
||||
options are explained over there.
|
||||
27
mon/build/Makefile
Normal file
27
mon/build/Makefile
Normal file
@@ -0,0 +1,27 @@
|
||||
include ../../build/Makefile
|
||||
|
||||
main := ../source/app/Main.cpp
|
||||
sources := $(filter-out $(main), $(shell find ../source -iname '*.cpp'))
|
||||
|
||||
$(call build-static-library,\
|
||||
Mon,\
|
||||
$(sources),\
|
||||
common dl curl cassandra nl3-route,\
|
||||
../source)
|
||||
|
||||
$(call define-dep-lib,\
|
||||
Mon,\
|
||||
-I ../source,\
|
||||
$(build_dir)/libMon.a)
|
||||
|
||||
$(call build-executable,\
|
||||
beegfs-mon,\
|
||||
$(main),\
|
||||
Mon common dl curl cassandra nl3-route)
|
||||
|
||||
$(call build-test,\
|
||||
test-runner,\
|
||||
$(shell find ../tests -name '*.cpp'),\
|
||||
Mon common dl curl cassandra nl3-route,\
|
||||
../tests)
|
||||
|
||||
9
mon/build/dist/etc/beegfs-mon.auth
vendored
Normal file
9
mon/build/dist/etc/beegfs-mon.auth
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
# This file configures the credentials needed to connect to your monitoring database instance.
|
||||
# This currently only works with InfluxDB.
|
||||
|
||||
username =
|
||||
password =
|
||||
|
||||
# used by influxdb V2 only
|
||||
organization =
|
||||
token =
|
||||
345
mon/build/dist/etc/beegfs-mon.conf
vendored
Normal file
345
mon/build/dist/etc/beegfs-mon.conf
vendored
Normal file
@@ -0,0 +1,345 @@
|
||||
# This is a config file for the BeeGFS Mon daemon.
|
||||
# http://www.beegfs.com
|
||||
|
||||
|
||||
# --- [Table of Contents] ---
|
||||
#
|
||||
# 1) Settings
|
||||
# 2) Command Line Arguments
|
||||
# 3) Basic Settings Documentation
|
||||
# 4) Advanced Settings Documentation
|
||||
|
||||
|
||||
#
|
||||
# --- Section 1.1: [Basic Settings] ---
|
||||
#
|
||||
|
||||
sysMgmtdHost =
|
||||
|
||||
#
|
||||
# --- Section 1.2: [Advanced Settings] ---
|
||||
#
|
||||
|
||||
dbType = influxdb
|
||||
dbHostName = localhost
|
||||
dbHostPort = 8086
|
||||
dbAuthFile =
|
||||
|
||||
# used by influxdb only
|
||||
dbDatabase = beegfs_mon
|
||||
dbMaxPointsPerRequest = 5000
|
||||
dbSetRetentionPolicy = true
|
||||
dbRetentionDuration = 1d
|
||||
|
||||
# used by influxdb V2 only
|
||||
dbBucket =
|
||||
|
||||
# used by cassandra only
|
||||
cassandraMaxInsertsPerBatch = 25
|
||||
cassandraTTLSecs = 86400
|
||||
|
||||
|
||||
collectClientOpsByNode = true
|
||||
collectClientOpsByUser = true
|
||||
statsRequestIntervalSecs = 5
|
||||
httpTimeoutMSecs = 1000
|
||||
|
||||
nodelistRequestIntervalSecs = 30
|
||||
|
||||
curlCheckSSLCertificates = true
|
||||
|
||||
connMgmtdPort = 8008
|
||||
connPortShift = 0
|
||||
|
||||
connAuthFile = /etc/beegfs/conn.auth
|
||||
connDisableAuthentication = false
|
||||
connFallbackExpirationSecs = 900
|
||||
connMaxInternodeNum = 3
|
||||
connInterfacesFile =
|
||||
connNetFilterFile =
|
||||
connTcpOnlyFilterFile =
|
||||
|
||||
logType = syslog
|
||||
logLevel = 3
|
||||
logNoDate = false
|
||||
logNumLines = 50000
|
||||
logNumRotatedFiles = 2
|
||||
logStdFile = /var/log/beegfs-mon.log
|
||||
|
||||
runDaemonized = true
|
||||
|
||||
tuneNumWorkers = 4
|
||||
|
||||
|
||||
#
|
||||
# --- Section 2: [Command Line Arguments] ---
|
||||
#
|
||||
|
||||
# Use the command line argument "cfgFile=/etc/anotherconfig.conf" to
|
||||
# specify a different config file for beegfs_mon.
|
||||
# All other options in this file can also be used as command line
|
||||
# arguments, overriding the corresponding config file values.
|
||||
|
||||
|
||||
#
|
||||
# --- Section 3: [Basic Settings Documentation] ---
|
||||
#
|
||||
|
||||
# [sysMgmtdHost]
|
||||
# Hostname (or IP) of the host running the management service.
|
||||
# (See also "connMgmtdPort")
|
||||
# Default: <none>
|
||||
|
||||
|
||||
#
|
||||
# --- Section 4: [Advanced Settings Documentation] ---
|
||||
#
|
||||
|
||||
#
|
||||
# --- Section 4.1: [Mon] ---
|
||||
#
|
||||
|
||||
# [dbType]
|
||||
# The time series database engine to use. Currently, influxdb and cassandra are supported.
|
||||
# For most use cases, using InfluxDB is recommended because it is easier to use and more
|
||||
# lightweight.
|
||||
|
||||
# [dbHostName]
|
||||
# The hostname where the database backend runs. Can also be given as an URL including
|
||||
# protocol. The protocol can be HTTP (default), or, if an SSL encrypted connection
|
||||
# is required, HTTPS. Example: https://localhost.
|
||||
# Default: localhost
|
||||
|
||||
# [dbPort]
|
||||
# The port on which the database backend listens for connections.
|
||||
# Default: 8086 (which is the default port used by InfluxDB)
|
||||
|
||||
# [dbDatabase]
|
||||
# The database/keyspace name in which the data is stored.
|
||||
# Default: beegfs_mon
|
||||
|
||||
# [dbAuthFile]
|
||||
# Defines a file where the authentication credentials for the database are stored.
|
||||
# This file should be set to be readable by root only. When mon was installed via
|
||||
# package, the file was already created and placed at /etc/beegfs/beegfs-mon.auth
|
||||
# Default: <none>
|
||||
|
||||
# [dbMaxPointsPerRequest]
|
||||
# Sets the max number of points that will be cached until the whole
|
||||
# set is sent via HTTP to the database backend. After a whole set of statistics has been
|
||||
# collected, the cached points will be sent in any case. Small values lead to
|
||||
# many sent requests, and thus, packages, too big ones can exceed certain limits and may
|
||||
# cause failure. A few thousands is a sensible value here. Only used for InfluxDB.
|
||||
# Default: 5000
|
||||
|
||||
# [dbSetRetentionPolicy]
|
||||
# Determines whether the service shall automatically apply a retention policy
|
||||
# to the database at startup. This should only be set to false if the user wants
|
||||
# to configure the database by himself and wants to have a more sophisticated
|
||||
# retention policy. Only used for InfluxDB.
|
||||
# Default: true
|
||||
|
||||
# [dbRetentionDuration]
|
||||
# Defines how long the data points shall be stored until dropped by InfluxDB.
|
||||
# This is only relevant if dbSetRetentionPolicy is set to true.
|
||||
# Valid values are in the form ^[0-9]+[smhdw]$, while the suffixes mean
|
||||
# seconds, minutes, hours, days, weeks. 2d, for example, means two days.
|
||||
# Only used for InfluxDB. For more details please consult the InfluxDB documentation.
|
||||
# Default: 1d (one day)
|
||||
|
||||
# # [dbBucket]
|
||||
# The bucket name in which the data is stored.
|
||||
|
||||
# [cassandraSetMaxInsertsPerBatch]
|
||||
# Sets the max number of INSERT statements that will be batched together for execution
|
||||
# using the thirdparty client library for cassandra. It only accepts a maximum of a few
|
||||
# thousand bytes by default, so a sensible order of magnitude is around 20 to 30. If
|
||||
# you get warnings in the log like "Batch for [beegfs_mon.meta,
|
||||
# beegfs_mon.highresmeta] is of size X, exceeding specified threshold of 5120 by Y.",
|
||||
# you can try to reduce this number. Only used for Cassandra.
|
||||
# Default: 25
|
||||
|
||||
# [cassandraTTLSecs]
|
||||
# Defines the number of seconds the data rows shall be stored until marked for removal
|
||||
# by the database engine. Only used for Cassandra.
|
||||
# Default: 86400 (one day)
|
||||
|
||||
|
||||
# [collectClientOpsByNode]
|
||||
# Sets wether mon collects the client ops stats from the nodes, grouped by the client node IP.
|
||||
# Default: true
|
||||
|
||||
# [collectClientOpsByUser]
|
||||
# Sets wether mon collects the client ops stats from the nodes, grouped by the clients user ID.
|
||||
# Default: true
|
||||
|
||||
# [statsRequestIntervalSecs]
|
||||
# Sets the waiting interval in seconds between the stats query operation in seconds.
|
||||
# This does not affect the the high resolution stats (which is always measured in
|
||||
# one second intervals).
|
||||
# Default: 5
|
||||
|
||||
# [httpTimeoutMSecs]
|
||||
# Defines the timeout for the http requests that are sent to the InfluxDB daemon
|
||||
# in milliseconds.
|
||||
# Default: 1000
|
||||
|
||||
# [nodelistRequestIntervalSecs]
|
||||
# Sets the waiting interval in seconds between the nodelist requests operation
|
||||
# in seconds. This defines how often the service pulls the newest node lists from
|
||||
# the management daemon.
|
||||
# Default: 30
|
||||
|
||||
|
||||
# [curlCheckSSLCertificates]
|
||||
# Decides whether the servers certificate and hostname shall be checked to be valid when using
|
||||
# an SSL encrypted connection to an InfluxDB host.
|
||||
# Disable when using self signed certificates without proper CA certificates.
|
||||
# Default: true
|
||||
|
||||
#
|
||||
# --- Section 4.2: [Connections & Communication] ---
|
||||
#
|
||||
|
||||
# [connMgmtdPort]
|
||||
# The UDP and TCP port of the management node.
|
||||
# Default: 8008
|
||||
|
||||
# [connPortShift]
|
||||
# Shifts all following UDP and TCP ports according to the specified value.
|
||||
# Intended to make port configuration easier in case you do not want to
|
||||
# configure each port individually.
|
||||
# Default: 0
|
||||
|
||||
# [connAuthFile]
|
||||
# The path to a file that contains a shared secret for connection based
|
||||
# authentication. Only peers that use the same shared secret will be able to
|
||||
# connect.
|
||||
# Default: <none>
|
||||
|
||||
# [connDisableAuthentication]
|
||||
# If set to true, explicitly disables connection authentication and allow the
|
||||
# service to run without a connAuthFile. Running BeeGFS without connection
|
||||
# authentication is considered insecure and is not recommended.
|
||||
# Default: false
|
||||
|
||||
# [connFallbackExpirationSecs]
|
||||
# The time in seconds after which a connection to a fallback interface expires.
|
||||
# When a fallback connection expires, the system will try to establish a new
|
||||
# connection to the other hosts primary interface (falling back to another
|
||||
# interface again if necessary).
|
||||
# Note: The priority of node interfaces can be configured using the
|
||||
# "connInterfacesFile" parameter.
|
||||
# Default: 900
|
||||
|
||||
# [connMaxInternodeNum]
|
||||
# The maximum number of simultaneous connections to the same node.
|
||||
# Default: 3
|
||||
|
||||
# [connInterfacesFile]
|
||||
# The path to a text file that specifies the names of the interfaces which
|
||||
# may be used for communication by other nodes. One interface per line. The
|
||||
# line number also defines the priority of an interface.
|
||||
# Example: "ib0" in the first line, "eth0" in the second line.
|
||||
# Values: This setting is optional. If unspecified, all available interfaces
|
||||
# will be published and priorities will be assigned automatically.
|
||||
# Note: This information is sent to other hosts to inform them about possible
|
||||
# communication paths. See connRestrictOutboundInterfaces for this
|
||||
# configuration's potential effect on outbound connections.
|
||||
# Default: <none>
|
||||
|
||||
# [connRestrictOutboundInterfaces]
|
||||
# The default behavior of BeeGFS is to use any available network interface
|
||||
# to establish an outbound connection to a node, according to the TCP/IP
|
||||
# configuration of the operating system. When connRestrictOutboundInterfaces
|
||||
# is set to true, the network interfaces used for outbound connections are
|
||||
# limited to the values specified by connInterfacesFile or connInterfacesList.
|
||||
# The operating system routing tables are consulted to determine which
|
||||
# interface to use for a particular node's IP address. If there is no
|
||||
# route from the configured interfaces that is suitable for a node's IP
|
||||
# address then the connection will fail to be established.
|
||||
# Default: false
|
||||
|
||||
# [connNoDefaultRoute]
|
||||
# When connRestrictOutboundInterfaces is true, the routing logic will use
|
||||
# the default route for a Node's IP address when no specific route for that
|
||||
# address is found in the routing tables. This can be problematic during a
|
||||
# failure situation, as the default route is not appropriate to use for a
|
||||
# subnet that is accessible from an interface that has failed.
|
||||
# connNoDefaultRoute is a comma-separated list of CIDRs that should never
|
||||
# be accessed via the default route.
|
||||
# Default: 0.0.0.0/0. This prevents the default route from ever being used.
|
||||
|
||||
# [connNetFilterFile]
|
||||
# The path to a text file that specifies allowed IP subnets, which may be used
|
||||
# for outgoing communication. One subnet per line in classless notation (IP
|
||||
# address and number of significant bits).
|
||||
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
|
||||
# line.
|
||||
# Values: This setting is optional. If unspecified, all addresses are allowed
|
||||
# for outgoing communication.
|
||||
# Default: <none>
|
||||
|
||||
# [connTcpOnlyFilterFile]
|
||||
# The path to a text file that specifies IP address ranges to which no RDMA connection should be
|
||||
# established. This is useful e.g. for environments where all hosts support RDMA, but some hosts
|
||||
# cannot connect via RDMA to some other hosts.
|
||||
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
|
||||
# line.
|
||||
# Values: This setting is optional.
|
||||
# Default: <none>
|
||||
|
||||
#
|
||||
# --- Section 4.3: [Logging] ---
|
||||
#
|
||||
|
||||
# [logType]
|
||||
# Defines the logger type. This can either be "syslog" to send log messages to
|
||||
# the general system logger or "logfile". If set to logfile, logs will be written
|
||||
# to logStdFile.
|
||||
# Default: logfile
|
||||
|
||||
# [logLevel]
|
||||
# Defines the amount of output messages. The higher this level, the more
|
||||
# detailed the log messages will be.
|
||||
# Note: Levels above 2 might decrease performance.
|
||||
# Default: 2 (Max: 5)
|
||||
|
||||
# [logNoDate]
|
||||
# Defines whether "date & time" (=false) or the current "time only" (=true)
|
||||
# should be logged.
|
||||
# Default: false
|
||||
|
||||
# [logNumLines]
|
||||
# The maximum number of lines per log file.
|
||||
# Default: 50000
|
||||
|
||||
# [logNumRotatedFiles]
|
||||
# The number of old files to keep when "logNumLines" is reached and the log file
|
||||
# is rewritten. (Log rotation)
|
||||
# Default: 2
|
||||
|
||||
# [logStdFile]
|
||||
# The path and filename of the log file for standard log messages. If no name
|
||||
# is specified, the messages will be written to the console.
|
||||
# Default: /var/log/beegfs-mon.log
|
||||
|
||||
|
||||
#
|
||||
# --- Section 4.4: [Startup] ---
|
||||
#
|
||||
|
||||
# [runDaemonized]
|
||||
# Detach the process from its parent (and from stdin/-out/-err).
|
||||
# Default: true
|
||||
|
||||
|
||||
#
|
||||
# --- Section 4.5: [Tuning] ---
|
||||
#
|
||||
|
||||
# [tuneNumWorkers]
|
||||
# The number of worker threads. Should be at least 3. A value of up to twice the
|
||||
# number of CPU cores of your machine is the recommended choice.
|
||||
# Default: 4
|
||||
|
||||
29
mon/build/dist/etc/default/beegfs-mon
vendored
Normal file
29
mon/build/dist/etc/default/beegfs-mon
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
# BeeGFS mon service configuration.
|
||||
|
||||
# Note: This file is only used together with sysV init scripts.
|
||||
# If your system uses systemd, this file is ignored.
|
||||
# In this case:
|
||||
#
|
||||
# - use `systemctl enable / disable` to activate / decativate a service
|
||||
#
|
||||
# - systemd service templates are used for multimode
|
||||
# (See https://www.beegfs.io/wiki/MultiMode)
|
||||
#
|
||||
#
|
||||
# Set to "NO" to disable start of the BeeGFS mon daemon via the init
|
||||
# script.
|
||||
START_SERVICE="YES"
|
||||
|
||||
# Set to "YES" if you want to start multiple mon daemons with different
|
||||
# configuration files on this machine.
|
||||
#
|
||||
# Create a subdirectory with the ending ".d" in "/etc/beegfs/" for every config
|
||||
# file. The subdirectory name will be used to identify a particular server
|
||||
# instance for init script start/stop.
|
||||
#
|
||||
# Note: The original config file in /etc/beegfs will not be used when multi-mode
|
||||
# is enabled.
|
||||
#
|
||||
# Example: /etc/beegfs/scratch.d/beegfs-mon.conf
|
||||
# $ /etc/init.d/beegfs-mon start scratch
|
||||
MULTI_MODE="NO"
|
||||
22
mon/build/dist/etc/init.d/beegfs-mon.init
vendored
Executable file
22
mon/build/dist/etc/init.d/beegfs-mon.init
vendored
Executable file
@@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
### BEGIN INIT INFO
|
||||
# Provides: beegfs-mon
|
||||
# Required-Start:
|
||||
# Should-Start: $network
|
||||
# Required-Stop:
|
||||
# Should-Stop: $networkm
|
||||
# Default-Start: 2 3 4 5
|
||||
# Default-Stop: 0 1 6
|
||||
# chkconfig: 35 95 9
|
||||
# Short-Description: BeeGFS Mon
|
||||
# Description: Start BeeGFS Mon
|
||||
### END INIT INFO
|
||||
|
||||
APP_NAME="BeeGFS Mon"
|
||||
SERVICE_NAME=beegfs-mon
|
||||
|
||||
# source function library
|
||||
. /etc/beegfs/lib/start-stop-functions
|
||||
. /etc/beegfs/lib/init-multi-mode
|
||||
|
||||
14
mon/build/dist/usr/lib/systemd/system/beegfs-mon.service
vendored
Normal file
14
mon/build/dist/usr/lib/systemd/system/beegfs-mon.service
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
[Unit]
|
||||
Description=BeeGFS Mon Server
|
||||
Documentation=http://www.beegfs.com/content/documentation/
|
||||
Requires=network-online.target
|
||||
# We disable the wants service, because it spams the log files
|
||||
#Wants=beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service
|
||||
After=network-online.target beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service zfs.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/opt/beegfs/sbin/beegfs-mon cfgFile=/etc/beegfs/beegfs-mon.conf runDaemonized=false
|
||||
Type=simple
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
14
mon/build/dist/usr/lib/systemd/system/beegfs-mon@.service
vendored
Normal file
14
mon/build/dist/usr/lib/systemd/system/beegfs-mon@.service
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
[Unit]
|
||||
Description=BeeGFS Mon Server (multimode)
|
||||
Documentation=http://www.beegfs.com/content/documentation/
|
||||
Requires=network-online.target
|
||||
# We disable the wants service, because it spams the log files
|
||||
#Wants=beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service
|
||||
After=network-online.target beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service zfs.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/opt/beegfs/sbin/beegfs-mon cfgFile=/etc/beegfs/%I.d/beegfs-mon.conf runDaemonized=false
|
||||
Type=simple
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
155
mon/scripts/grafana/alerts/CPU-alert-v1.json
Normal file
155
mon/scripts/grafana/alerts/CPU-alert-v1.json
Normal file
@@ -0,0 +1,155 @@
|
||||
{
|
||||
"id": 2,
|
||||
"uid": "cf53330f-49cf-4b1e-bb59-e4580d32e707",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "CPU Alert",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"host::tag"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"measurement": "cpu",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "auto",
|
||||
"query": "SELECT mean(\"usage_system\") FROM \"auto\".\"cpu\" WHERE $timeFilter GROUP BY \"host\"::tag",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"usage_system"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "$B > 80",
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "math"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-17T18:28:40+05:30",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "30m",
|
||||
"annotations": {
|
||||
"summary": "CPU usage is above thershold set",
|
||||
"description": "Please check host \"{{ $labels.host }}\" its cpu usage is above thershold"
|
||||
},
|
||||
"labels": {
|
||||
"cpu-severity": "{{if gt $values.B.Value 90.0}}critical{{else if gt $values.B.Value 80.0}}warning{{else}}info{{end}}"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
125
mon/scripts/grafana/alerts/CPU-alert-v2.json
Normal file
125
mon/scripts/grafana/alerts/CPU-alert-v2.json
Normal file
@@ -0,0 +1,125 @@
|
||||
{
|
||||
"id": 2,
|
||||
"uid": "c1ec4ef2-dae2-4c85-b478-8119bb4326e6",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "CPU Alert",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"query": "from(bucket: \"${BUCKET}\") \r\n|> range(start: v.timeRangeStart, stop: v.timeRangeStop) \r\n|> filter(fn: (r) => r[\"_measurement\"] == \"cpu\") \r\n|> filter(fn: (r) => r[\"_field\"] == \"usage_system\")\r\n|> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\") \r\n|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) \r\n|> yield(name: \"mean\")",
|
||||
"refId": "A"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"B"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "$B > 80",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "math"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-17T12:42:56Z",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "30m",
|
||||
"annotations": {
|
||||
"description": "Please check host \"{{ $labels.host }}\" its cpu usage is above thershold",
|
||||
"summary": "CPU usage is above thershold set"
|
||||
},
|
||||
"labels": {
|
||||
"cpu-severity": "{{ if gt $values.B.Value 90.0 }}critical{{ else if gt $values.B.Value 80.0 }}warning{{ else }}info{{ end }}"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
156
mon/scripts/grafana/alerts/Disk-alert-v1.json
Normal file
156
mon/scripts/grafana/alerts/Disk-alert-v1.json
Normal file
@@ -0,0 +1,156 @@
|
||||
{
|
||||
"id": 3,
|
||||
"uid": "af36a69e-fd32-4ebc-94cd-474ea6c9edb2",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "Disk Alert",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"storageTargetID::tag"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"measurement": "storageTargets",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "auto",
|
||||
"query": "SELECT (diskSpaceFree / diskSpaceTotal) * 100 FROM \"auto\".\"storageTargets\" WHERE $timeFilter GROUP BY \"storageTargetID\"::tag",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"diskSpaceFree"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
" / "
|
||||
],
|
||||
"type": "math"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "$B < 30",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "math"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-10T16:06:31+05:30",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "10m",
|
||||
"annotations": {
|
||||
"description": "Please check, as the Disk Space for Storage Target ID '{{ $labels.storageTargetID }}' is only {{ humanize $values.B.Value }}%.",
|
||||
"summary": "BeeGFS Storage Target disk space is low"
|
||||
},
|
||||
"labels": {
|
||||
"disk-severity": "{{if lt $values.B.Value 20.0}}critical{{else if lt $values.B.Value 30.0}}warning{{else}}info{{end}}"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
125
mon/scripts/grafana/alerts/Disk-alert-v2.json
Normal file
125
mon/scripts/grafana/alerts/Disk-alert-v2.json
Normal file
@@ -0,0 +1,125 @@
|
||||
{
|
||||
"id": 3,
|
||||
"uid": "c0008edf-2473-47be-b0ff-ab50bad831c5",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "Disk Alert",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"query": "from(bucket: \"${BUCKET}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r[\"_measurement\"] == \"storageTargets\")\r\n |> filter(fn: (r) => r._field == \"diskSpaceTotal\" or r._field == \"diskSpaceFree\")\r\n |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n |> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> map(fn: (r) => ({ r with _value:(r.diskSpaceFree/ r.diskSpaceTotal) * 100.0 }))\r\n |> rename(columns: {_value: \"DiskFreePercent\"})\r\n |> drop(columns:[\"_start\",\"_stop\",\"_measurement\",\"diskSpaceTotal\",\"diskSpaceFree\"])\r\n",
|
||||
"refId": "A"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"B"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "$B < 30",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "math"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-16T18:16:45Z",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "10m",
|
||||
"annotations": {
|
||||
"description": "Please check, as the Disk Space for Storage Target ID '{{ $labels.storageTargetID }}' is only {{ humanize $values.B.Value }}%.",
|
||||
"summary": "BeeGFS Storage Target disk space is low"
|
||||
},
|
||||
"labels": {
|
||||
"disk-severity": "{{if lt $values.B.Value 20.0}}critical{{else if lt $values.B.Value 30.0}}warning{{else}}info{{end}}"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
156
mon/scripts/grafana/alerts/Inodes-alert-v1.json
Normal file
156
mon/scripts/grafana/alerts/Inodes-alert-v1.json
Normal file
@@ -0,0 +1,156 @@
|
||||
{
|
||||
"id": 4,
|
||||
"uid": "e2ad5c16-110f-43df-a784-829561fe3317",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "Inodes Alert ",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"storageTargetID::tag"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"measurement": "storageTargets",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "auto",
|
||||
"query": "SELECT (inodesFree / inodesTotal) * 100 FROM \"auto\".\"storageTargets\" WHERE $timeFilter GROUP BY \"storageTargetID\"::tag",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"diskSpaceFree"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
" / "
|
||||
],
|
||||
"type": "math"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "$B < 20",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "math"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-10T16:06:31+05:30",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "10m",
|
||||
"annotations": {
|
||||
"description": "Please check, as the free inodes for Storage Target ID '{{ $labels.storageTargetID }}' are only at {{ humanize $values.B.Value }}%.",
|
||||
"summary": "BeeGFS Storage Target Inodes are below the threshold."
|
||||
},
|
||||
"labels": {
|
||||
"inodes": "free"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
125
mon/scripts/grafana/alerts/Inodes-alert-v2.json
Normal file
125
mon/scripts/grafana/alerts/Inodes-alert-v2.json
Normal file
@@ -0,0 +1,125 @@
|
||||
{
|
||||
"id": 4,
|
||||
"uid": "be096d59-9dc4-4821-9530-8447e7261d9c",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "Inodes Alert",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"query": "from(bucket: \"${BUCKET}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r[\"_measurement\"] == \"storageTargets\")\r\n |> filter(fn: (r) => r._field == \"inodesTotal\" or r._field == \"inodesFree\")\r\n |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n |> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> map(fn: (r) => ({ r with _value:(r.inodesFree/ r.inodesTotal) * 100.0 }))\r\n |> rename(columns: {_value: \"InodesFreePercent\"})\r\n |> drop(columns:[\"_start\",\"_stop\",\"_measurement\",\"inodesFree\",\"inodesTotal\"])",
|
||||
"refId": "A"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"B"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "$B < 20",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "math"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-16T18:16:45Z",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "10m",
|
||||
"annotations": {
|
||||
"description": "Please check, as the free inodes for Storage Target ID '{{ $labels.storageTargetID }}' are only at {{ humanize $values.B.Value }}%.",
|
||||
"summary": "BeeGFS Storage Target Inodes are below the threshold."
|
||||
},
|
||||
"labels": {
|
||||
"inodes": "free"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
151
mon/scripts/grafana/alerts/MetaQueuedrequest-alert-v1.json
Normal file
151
mon/scripts/grafana/alerts/MetaQueuedrequest-alert-v1.json
Normal file
@@ -0,0 +1,151 @@
|
||||
{
|
||||
"id": 5,
|
||||
"uid": "bc49ff76-3db9-4f8b-b88a-947c7717fc18",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "Meta Queued Request Alert",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"nodeID::tag"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
],
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"measurement": "highResMeta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "auto",
|
||||
"query": "SELECT max(\"queuedRequests\") FROM \"auto\".\"highResMeta\" WHERE $timeFilter GROUP BY \"nodeID\"::tag",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"queuedRequests"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"B"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
50
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"C"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "B",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "threshold"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-17T18:57:50+05:30",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "2m",
|
||||
"annotations": {
|
||||
"description": "Queued requests of BeeGFS meta server with nodeID - \"{{ $labels.nodeID }}\" is {{ $values.B }}",
|
||||
"summary": "Meta server queued requests is above threshold"
|
||||
},
|
||||
"labels": {
|
||||
"queued": "request"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
126
mon/scripts/grafana/alerts/MetaQueuedrequest-alert-v2.json
Normal file
126
mon/scripts/grafana/alerts/MetaQueuedrequest-alert-v2.json
Normal file
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"id": 5,
|
||||
"uid": "a5a9072e-a8c2-46c1-b3a0-88608956e83e",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "Meta Queued Request Alert",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"query": "from(bucket: \"${BUCKET}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r._measurement == \"highResMeta\" and r._field == \"queuedRequests\") \r\n|> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) \r\n|> yield(name: \"max\")",
|
||||
"refId": "A"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"B"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
50
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"C"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "B",
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "threshold"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-16T18:16:45Z",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "2m",
|
||||
"annotations": {
|
||||
"description": "Queued requests of BeeGFS meta server with nodeID - \"{{ $labels.nodeID }}\" is {{ $values.B }}",
|
||||
"summary": "Meta server queued requests is above threshold"
|
||||
},
|
||||
"labels": {
|
||||
"queued": "request"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
158
mon/scripts/grafana/alerts/Services-alert-v1.json
Normal file
158
mon/scripts/grafana/alerts/Services-alert-v1.json
Normal file
@@ -0,0 +1,158 @@
|
||||
{
|
||||
"id": 1,
|
||||
"uid": "d9a3e5ba-b5bc-4ede-989b-c605547eb2d",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "Services Alert",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"host"
|
||||
],
|
||||
"type": "tag"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"systemd_unit"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
],
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"measurement": "procstat_lookup",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "auto",
|
||||
"query": "SELECT last(\"running\") FROM \"auto\".\"procstat_lookup\" WHERE $timeFilter GROUP BY \"host\", \"systemd_unit\"",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"running"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "last"
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
1,
|
||||
0
|
||||
],
|
||||
"type": "lt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": []
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "avg"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"name": "Expression",
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "B",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "threshold"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-17T09:19:39+05:30",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "1m",
|
||||
"annotations": {
|
||||
"description": "BeeGFS Service \"{{ $labels.systemd_unit }}\" is Down , Please check host \"{{ $labels.host }}\"",
|
||||
"summary": "BeeGFS Service \"{{ $labels.systemd_unit }}\" is Down"
|
||||
},
|
||||
"labels": {
|
||||
"service_status": "down"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
126
mon/scripts/grafana/alerts/Services-alert-v2.json
Normal file
126
mon/scripts/grafana/alerts/Services-alert-v2.json
Normal file
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"id": 1,
|
||||
"uid": "a96d9b2e-2a6b-4ab3-9858-200da324672f",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "Service Alert",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"query": "from(bucket: \"${BUCKET}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r[\"_measurement\"] == \"procstat_lookup\")\r\n |> filter(fn: (r) => r._field == \"running\")\r\n |> group(columns: [\"host\", \"systemd_unit\"], mode: \"by\")\r\n |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n |> keep(columns: [\"_time\", \"_value\", \"host\", \"systemd_unit\"])\r\n |> sort(columns: [\"_time\"])",
|
||||
"refId": "A"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"B"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
1
|
||||
],
|
||||
"type": "lt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"C"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "B",
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "threshold"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-17T11:33:42Z",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "1m",
|
||||
"annotations": {
|
||||
"description": "BeeGFS Service \"{{ $labels.systemd_unit }}\" is Down , Please check host \"{{ $labels.host }}\"",
|
||||
"summary": "BeeGFS Service \"{{ $labels.systemd_unit }}\" is Down"
|
||||
},
|
||||
"labels": {
|
||||
"service_status": "down"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
151
mon/scripts/grafana/alerts/StorageQueuedrequest-alert-v1.json
Normal file
151
mon/scripts/grafana/alerts/StorageQueuedrequest-alert-v1.json
Normal file
@@ -0,0 +1,151 @@
|
||||
{
|
||||
"id": 6,
|
||||
"uid": "c81b9c61-d553-4240-aff1-e92627a40a11",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "Storage Queued Request Alert ",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"nodeID::tag"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
],
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"measurement": "highResStorage",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "auto",
|
||||
"query": "SELECT max(\"queuedRequests\") FROM \"auto\".\"highResStorage\" WHERE $timeFilter GROUP BY \"nodeID\"::tag",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"queuedRequests"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"B"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
50
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"C"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "B",
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "threshold"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-17T19:13:11+05:30",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "2m",
|
||||
"annotations": {
|
||||
"description": "Queued requests of BeeGFS Storage Server with nodeID - \"{{ $labels.nodeID }}\" is {{ $values.B }}",
|
||||
"summary": "Storage server queued requests is above threshold"
|
||||
},
|
||||
"labels": {
|
||||
"queued": "request"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
126
mon/scripts/grafana/alerts/StorageQueuedrequest-alert-v2.json
Normal file
126
mon/scripts/grafana/alerts/StorageQueuedrequest-alert-v2.json
Normal file
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"id": 6,
|
||||
"uid": "e0a4e911-6602-4adc-993b-d65672e7f431",
|
||||
"orgID": 1,
|
||||
"folderUID": "beegfsalertfolder",
|
||||
"ruleGroup": "evaluate",
|
||||
"title": "Storage Queued Request Alert",
|
||||
"condition": "C",
|
||||
"data": [
|
||||
{
|
||||
"refId": "A",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "${DS_UID}",
|
||||
"model": {
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"query": "from(bucket: \"${BUCKET}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r._measurement == \"highResStorage\" and r._field == \"queuedRequests\") \r\n|> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) \r\n|> yield(name: \"max\")",
|
||||
"refId": "A"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"B"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "A",
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"reducer": "last",
|
||||
"refId": "B",
|
||||
"type": "reduce"
|
||||
}
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"queryType": "",
|
||||
"relativeTimeRange": {
|
||||
"from": 600,
|
||||
"to": 0
|
||||
},
|
||||
"datasourceUid": "__expr__",
|
||||
"model": {
|
||||
"conditions": [
|
||||
{
|
||||
"evaluator": {
|
||||
"params": [
|
||||
50
|
||||
],
|
||||
"type": "gt"
|
||||
},
|
||||
"operator": {
|
||||
"type": "and"
|
||||
},
|
||||
"query": {
|
||||
"params": [
|
||||
"C"
|
||||
]
|
||||
},
|
||||
"reducer": {
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
"type": "query"
|
||||
}
|
||||
],
|
||||
"datasource": {
|
||||
"type": "__expr__",
|
||||
"uid": "__expr__"
|
||||
},
|
||||
"expression": "B",
|
||||
"hide": false,
|
||||
"intervalMs": 1000,
|
||||
"maxDataPoints": 43200,
|
||||
"refId": "C",
|
||||
"type": "threshold"
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": "2023-10-16T18:16:45Z",
|
||||
"noDataState": "OK",
|
||||
"execErrState": "Error",
|
||||
"for": "2m",
|
||||
"annotations": {
|
||||
"description": "Queued requests of BeeGFS storage server with nodeID - \"{{ $labels.nodeID }}\" is {{ $values.B }}",
|
||||
"summary": "Storage server queued requests is above threshold"
|
||||
},
|
||||
"labels": {
|
||||
"queued": "request"
|
||||
},
|
||||
"isPaused": true
|
||||
}
|
||||
121
mon/scripts/grafana/alerts/alert-dashboard.json
Normal file
121
mon/scripts/grafana/alerts/alert-dashboard.json
Normal file
@@ -0,0 +1,121 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "alertlist",
|
||||
"name": "Alert list",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "10.1.4"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 22,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"alertInstanceLabelFilter": "",
|
||||
"alertName": "",
|
||||
"dashboardAlerts": false,
|
||||
"folder": {
|
||||
"title": "BeeGFS-Alert",
|
||||
"uid": "beegfsalertfolder"
|
||||
},
|
||||
"groupBy": [],
|
||||
"groupMode": "default",
|
||||
"maxItems": 20,
|
||||
"sortOrder": 1,
|
||||
"stateFilter": {
|
||||
"error": true,
|
||||
"firing": true,
|
||||
"noData": false,
|
||||
"normal": true,
|
||||
"pending": true
|
||||
},
|
||||
"viewMode": "list"
|
||||
},
|
||||
"pluginVersion": "10.1.4",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Alert List",
|
||||
"type": "alertlist"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Alerts List",
|
||||
"uid": "c4a31d8f-4dc6-4023-bc7a-1b06167a6f74",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
12
mon/scripts/grafana/alerts/contact-point.json
Normal file
12
mon/scripts/grafana/alerts/contact-point.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"uid": "d5c51f44-07d047ca-a580-5a66f643e",
|
||||
"name": "beegfs-email",
|
||||
"type": "email",
|
||||
"settings": {
|
||||
"addresses": "beegfsalert@example.com",
|
||||
"message": "{{ template \"beegfs.message\" . }}",
|
||||
"singleEmail": false,
|
||||
"subject": "{{ template \"beegfs.title\" . }}"
|
||||
},
|
||||
"disableResolveMessage": false
|
||||
}
|
||||
4
mon/scripts/grafana/alerts/email-template.json
Normal file
4
mon/scripts/grafana/alerts/email-template.json
Normal file
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"name": "BeeGFS-Email-Template",
|
||||
"template": "{{ define \"alert_severity_prefix_emoji\" -}}\n\t{{- if eq .Status \"firing\" -}}\n\t\t🔥\n\t{{- else -}}\n\t\t✅\n\t{{- end -}}\n{{- end -}}\n\n{{ define \"beegfs_subject\" }}\n{{ template \"alert_severity_prefix_emoji\" . }}\n[{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ if gt (.Alerts.Resolved | len) 0 }}, RESOLVED:{{ .Alerts.Resolved | len }}{{ end }}{{ end }} | {{ .CommonLabels.alertname -}}] \n{{ end }}\n\n{{ define \"beegfs_text_alert_list\" }}{{ range . }}\nSummary:\n\t{{ .Annotations.summary }}\n\t\nDescription:\n\t{{ .Annotations.description }}\n\nLabels:\n {{ range .Labels.SortedPairs -}}\n {{ .Name }} = {{ .Value }}\n\t{{ end }}\n{{ end }}\n{{ range . }}\n{{ if gt (len .SilenceURL) 0 }}Silence: {{ .SilenceURL }}\n{{ end }}\n{{ end }}\n{{ end }}\n\n\n{{ define \"beegfs.title\" }}{{ template \"beegfs_subject\" . }}{{ end }}\n\n{{ define \"beegfs.message\" }}{{ if gt (len .Alerts.Firing) 0 }}*Firing 🔥*\n{{ template \"beegfs_text_alert_list\" .Alerts.Firing }}{{ if gt (len .Alerts.Resolved) 0 }}\n\n{{ end }}{{ end }}\n\n{{ if gt (len .Alerts.Resolved) 0 }}*Resolved ✅*\n\nBelow alert is resolved:\n{{ template \"beegfs_text_alert_list\" .Alerts.Resolved }}\n{{ end }}\n{{ end }}"
|
||||
}
|
||||
97
mon/scripts/grafana/alerts/policies-telegraf.json
Normal file
97
mon/scripts/grafana/alerts/policies-telegraf.json
Normal file
@@ -0,0 +1,97 @@
|
||||
|
||||
{
|
||||
"receiver": "grafana-default-email",
|
||||
"routes": [
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"disk-severity",
|
||||
"=",
|
||||
"warning"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "5m",
|
||||
"repeat_interval": "6h"
|
||||
},
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"disk-severity",
|
||||
"=",
|
||||
"critical"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "5m",
|
||||
"repeat_interval": "6h"
|
||||
},
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"service_status",
|
||||
"=",
|
||||
"down"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "3m",
|
||||
"repeat_interval": "6h"
|
||||
},
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"inodes",
|
||||
"=",
|
||||
"free"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "5m",
|
||||
"repeat_interval": "6h"
|
||||
},
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"cpu-severity",
|
||||
"=",
|
||||
"critical"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "5m",
|
||||
"repeat_interval": "6h"
|
||||
},
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"cpu-severity",
|
||||
"=",
|
||||
"warning"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "5m",
|
||||
"repeat_interval": "6h"
|
||||
},
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"queued",
|
||||
"=",
|
||||
"request"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "5m",
|
||||
"repeat_interval": "6h"
|
||||
}
|
||||
]
|
||||
}
|
||||
58
mon/scripts/grafana/alerts/policies.json
Normal file
58
mon/scripts/grafana/alerts/policies.json
Normal file
@@ -0,0 +1,58 @@
|
||||
|
||||
{
|
||||
"receiver": "grafana-default-email",
|
||||
"routes": [
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"disk-severity",
|
||||
"=",
|
||||
"warning"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "5m",
|
||||
"repeat_interval": "6h"
|
||||
},
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"disk-severity",
|
||||
"=",
|
||||
"critical"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "5m",
|
||||
"repeat_interval": "6h"
|
||||
},
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"inodes",
|
||||
"=",
|
||||
"free"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "5m",
|
||||
"repeat_interval": "6h"
|
||||
},
|
||||
{
|
||||
"receiver": "beegfs-email",
|
||||
"object_matchers": [
|
||||
[
|
||||
"queued",
|
||||
"=",
|
||||
"request"
|
||||
]
|
||||
],
|
||||
"group_wait": "30s",
|
||||
"group_interval": "5m",
|
||||
"repeat_interval": "6h"
|
||||
}
|
||||
]
|
||||
}
|
||||
1642
mon/scripts/grafana/beegfs_overview_influxdbv1.json
Normal file
1642
mon/scripts/grafana/beegfs_overview_influxdbv1.json
Normal file
File diff suppressed because one or more lines are too long
1736
mon/scripts/grafana/beegfs_overview_influxdbv2.json
Normal file
1736
mon/scripts/grafana/beegfs_overview_influxdbv2.json
Normal file
File diff suppressed because one or more lines are too long
1643
mon/scripts/grafana/beegfs_overview_telegraf_influxdbv1.json
Normal file
1643
mon/scripts/grafana/beegfs_overview_telegraf_influxdbv1.json
Normal file
File diff suppressed because one or more lines are too long
1736
mon/scripts/grafana/beegfs_overview_telegraf_influxdbv2.json
Normal file
1736
mon/scripts/grafana/beegfs_overview_telegraf_influxdbv2.json
Normal file
File diff suppressed because one or more lines are too long
386
mon/scripts/grafana/client_ops_node_influxdbv1.json
Normal file
386
mon/scripts/grafana/client_ops_node_influxdbv1.json
Normal file
@@ -0,0 +1,386 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "SELECT sum(*) FROM \"metaClientOpsByNode\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"node\" =~ /^$nodeID$/ GROUP BY node",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table"
|
||||
}
|
||||
],
|
||||
"title": "Meta Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time": 0,
|
||||
"node": 1,
|
||||
"sum_close": 2,
|
||||
"sum_createLI": 3,
|
||||
"sum_getXA": 16,
|
||||
"sum_hardlnk": 18,
|
||||
"sum_listXA": 19,
|
||||
"sum_mdsInf": 4,
|
||||
"sum_mkdir": 5,
|
||||
"sum_open": 6,
|
||||
"sum_rddir": 7,
|
||||
"sum_ren": 8,
|
||||
"sum_revalLI": 9,
|
||||
"sum_rmdir": 10,
|
||||
"sum_sAttr": 11,
|
||||
"sum_sChDrct": 12,
|
||||
"sum_stat": 13,
|
||||
"sum_statLI": 14,
|
||||
"sum_sum": 15,
|
||||
"sum_trunc": 17,
|
||||
"sum_unlnk": 20
|
||||
},
|
||||
"renameByName": {
|
||||
"node": "",
|
||||
"sum_close": "close",
|
||||
"sum_create": "create",
|
||||
"sum_createLI": "createLI",
|
||||
"sum_dirparent": "dirparent",
|
||||
"sum_entInf": "entInf",
|
||||
"sum_flckAp": "flckAp",
|
||||
"sum_flckEn": "flckzEn",
|
||||
"sum_flckRg": "flckRg",
|
||||
"sum_fndOwn": "fndOwn",
|
||||
"sum_getXA": "getXA",
|
||||
"sum_hardlnk": "hardlnk",
|
||||
"sum_listXA": "listXA",
|
||||
"sum_lookLI": "lookLI",
|
||||
"sum_mdsInf": "mdsInf",
|
||||
"sum_mirror": "mirror",
|
||||
"sum_mkdir": "mkdir",
|
||||
"sum_mvDirIns": "mvDirIns",
|
||||
"sum_mvFiIns": "mvFilns",
|
||||
"sum_open": "open",
|
||||
"sum_openLI": "openLI",
|
||||
"sum_rddir": "rddir",
|
||||
"sum_refrEnt": "refrEnt",
|
||||
"sum_ren": "ren",
|
||||
"sum_revalLI": "revalLI",
|
||||
"sum_rmLnk": "rmLnk",
|
||||
"sum_rmXA": "rmXA",
|
||||
"sum_rmdir": "rmdir",
|
||||
"sum_sAttr": "sAttr",
|
||||
"sum_sChDrct": "sChDrct",
|
||||
"sum_sDirPat": "sDirPat",
|
||||
"sum_setXA": "setXA",
|
||||
"sum_stat": "stat",
|
||||
"sum_statLI": "statLI",
|
||||
"sum_statfs": "statfs",
|
||||
"sum_sum": "sum",
|
||||
"sum_symlnk": "symlnk",
|
||||
"sum_trunc": "trunc",
|
||||
"sum_unlnk": "unlnk"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "sum_B-rd"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "sum_B-wr"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"node"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
],
|
||||
"measurement": "metaClientOpsByNode",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(*) FROM \"storageClientOpsByNode\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"node\" =~ /^$nodeID$/ GROUP BY node",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"*"
|
||||
],
|
||||
"type": "field"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"title": "Storage Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"sum_B-rd": "B-rd",
|
||||
"sum_B-wr": "B-wr",
|
||||
"sum_close": "close",
|
||||
"sum_getFSize": "getFSize",
|
||||
"sum_ops-rd": "ops-rd",
|
||||
"sum_ops-wr": "ops-wr",
|
||||
"sum_sAttr": "sAttr",
|
||||
"sum_sChDrct": "sChDrct",
|
||||
"sum_statfs": "statfs",
|
||||
"sum_storInf": "storInf",
|
||||
"sum_sum": "sum",
|
||||
"sum_trunc": "trunc",
|
||||
"sum_unlnk": "unlnk"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "show tag values from metaClientOpsByNode with key = \"node\"",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "nodeID",
|
||||
"multi": true,
|
||||
"name": "nodeID",
|
||||
"options": [],
|
||||
"query": "show tag values from metaClientOpsByNode with key = \"node\"",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Client Operations (by Node)",
|
||||
"uid": "HHI9d8UO",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
273
mon/scripts/grafana/client_ops_node_influxdbv2.json
Normal file
273
mon/scripts/grafana/client_ops_node_influxdbv2.json
Normal file
@@ -0,0 +1,273 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByNode\")\r\n |> filter(fn: (r) => r.node =~ /${nodeID:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"node\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n \r\n",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Meta Operation List",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-wr"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-rd"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": " from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByNode\")\r\n |> filter(fn: (r) => r.node =~ /${nodeID:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"node\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n \r\n",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Storage Operation List",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "buckets()",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Bucket",
|
||||
"multi": false,
|
||||
"name": "bucket",
|
||||
"options": [],
|
||||
"query": "buckets()",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "import \"influxdata/influxdb/schema\"\r\n\r\nschema.measurementTagValues(\r\n bucket: \"${bucket}\",\r\n tag: \"node\",\r\n measurement: \"metaClientOpsByNode\"\r\n)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "nodeID",
|
||||
"multi": true,
|
||||
"name": "nodeID",
|
||||
"options": [],
|
||||
"query": "import \"influxdata/influxdb/schema\"\r\n\r\nschema.measurementTagValues(\r\n bucket: \"${bucket}\",\r\n tag: \"node\",\r\n measurement: \"metaClientOpsByNode\"\r\n)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-2d",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Client Operations (by Node)",
|
||||
"uid": "V5Me2Vk",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
386
mon/scripts/grafana/client_ops_node_telegraf_influxdbv1.json
Normal file
386
mon/scripts/grafana/client_ops_node_telegraf_influxdbv1.json
Normal file
@@ -0,0 +1,386 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "SELECT sum(*) FROM \"metaClientOpsByNode\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"node\" =~ /^$nodeID$/ GROUP BY \"node\"",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table"
|
||||
}
|
||||
],
|
||||
"title": "Meta Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time": 0,
|
||||
"node": 1,
|
||||
"sum_close": 2,
|
||||
"sum_createLI": 3,
|
||||
"sum_getXA": 16,
|
||||
"sum_hardlnk": 18,
|
||||
"sum_listXA": 19,
|
||||
"sum_mdsInf": 4,
|
||||
"sum_mkdir": 5,
|
||||
"sum_open": 6,
|
||||
"sum_rddir": 7,
|
||||
"sum_ren": 8,
|
||||
"sum_revalLI": 9,
|
||||
"sum_rmdir": 10,
|
||||
"sum_sAttr": 11,
|
||||
"sum_sChDrct": 12,
|
||||
"sum_stat": 13,
|
||||
"sum_statLI": 14,
|
||||
"sum_sum": 15,
|
||||
"sum_trunc": 17,
|
||||
"sum_unlnk": 20
|
||||
},
|
||||
"renameByName": {
|
||||
"node": "",
|
||||
"sum_close": "close",
|
||||
"sum_create": "create",
|
||||
"sum_createLI": "createLI",
|
||||
"sum_dirparent": "dirparent",
|
||||
"sum_entInf": "entInf",
|
||||
"sum_flckAp": "flckAp",
|
||||
"sum_flckEn": "flckzEn",
|
||||
"sum_flckRg": "flckRg",
|
||||
"sum_fndOwn": "fndOwn",
|
||||
"sum_getXA": "getXA",
|
||||
"sum_hardlnk": "hardlnk",
|
||||
"sum_listXA": "listXA",
|
||||
"sum_lookLI": "lookLI",
|
||||
"sum_mdsInf": "mdsInf",
|
||||
"sum_mirror": "mirror",
|
||||
"sum_mkdir": "mkdir",
|
||||
"sum_mvDirIns": "mvDirIns",
|
||||
"sum_mvFiIns": "mvFilns",
|
||||
"sum_open": "open",
|
||||
"sum_openLI": "openLI",
|
||||
"sum_rddir": "rddir",
|
||||
"sum_refrEnt": "refrEnt",
|
||||
"sum_ren": "ren",
|
||||
"sum_revalLI": "revalLI",
|
||||
"sum_rmLnk": "rmLnk",
|
||||
"sum_rmXA": "rmXA",
|
||||
"sum_rmdir": "rmdir",
|
||||
"sum_sAttr": "sAttr",
|
||||
"sum_sChDrct": "sChDrct",
|
||||
"sum_sDirPat": "sDirPat",
|
||||
"sum_setXA": "setXA",
|
||||
"sum_stat": "stat",
|
||||
"sum_statLI": "statLI",
|
||||
"sum_statfs": "statfs",
|
||||
"sum_sum": "sum",
|
||||
"sum_symlnk": "symlnk",
|
||||
"sum_trunc": "trunc",
|
||||
"sum_unlnk": "unlnk"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "sum_B-rd"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "sum_B-wr"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"node"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
],
|
||||
"measurement": "metaClientOpsByNode",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(*) FROM \"storageClientOpsByNode\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"node\" =~ /^$nodeID$/ GROUP BY node",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"*"
|
||||
],
|
||||
"type": "field"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"title": "Storage Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"sum_B-rd": "B-rd",
|
||||
"sum_B-wr": "B-wr",
|
||||
"sum_close": "close",
|
||||
"sum_getFSize": "getFSize",
|
||||
"sum_ops-rd": "ops-rd",
|
||||
"sum_ops-wr": "ops-wr",
|
||||
"sum_sAttr": "sAttr",
|
||||
"sum_sChDrct": "sChDrct",
|
||||
"sum_statfs": "statfs",
|
||||
"sum_storInf": "storInf",
|
||||
"sum_sum": "sum",
|
||||
"sum_trunc": "trunc",
|
||||
"sum_unlnk": "unlnk"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "show tag values from metaClientOpsByNode with key = \"node\"",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Node ID",
|
||||
"multi": true,
|
||||
"name": "nodeID",
|
||||
"options": [],
|
||||
"query": "show tag values from metaClientOpsByNode with key = \"node\"",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Client Operations (by Node)",
|
||||
"uid": "HHI9dJV4k",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
273
mon/scripts/grafana/client_ops_node_telegraf_influxdbv2.json
Normal file
273
mon/scripts/grafana/client_ops_node_telegraf_influxdbv2.json
Normal file
@@ -0,0 +1,273 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByNode\")\r\n |> filter(fn: (r) => r.node =~ /${nodeID:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"node\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n \r\n",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Meta Operation List",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-wr"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-rd"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": " from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByNode\")\r\n |> filter(fn: (r) => r.node =~ /${nodeID:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"node\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n \r\n",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Storage Operation List",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "buckets()",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Bucket",
|
||||
"multi": false,
|
||||
"name": "bucket",
|
||||
"options": [],
|
||||
"query": "buckets()",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "import \"influxdata/influxdb/schema\"\r\n\r\nschema.measurementTagValues(\r\n bucket: \"${bucket}\",\r\n tag: \"node\",\r\n measurement: \"metaClientOpsByNode\"\r\n)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "nodeID",
|
||||
"multi": true,
|
||||
"name": "nodeID",
|
||||
"options": [],
|
||||
"query": "import \"influxdata/influxdb/schema\"\r\n\r\nschema.measurementTagValues(\r\n bucket: \"${bucket}\",\r\n tag: \"node\",\r\n measurement: \"metaClientOpsByNode\"\r\n)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-2d",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Client Operations (by Node)",
|
||||
"uid": "V5Me2Vk",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
||||
635
mon/scripts/grafana/client_ops_user_influxdbv1.json
Normal file
635
mon/scripts/grafana/client_ops_user_influxdbv1.json
Normal file
@@ -0,0 +1,635 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "piechart",
|
||||
"name": "Pie chart",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 6,
|
||||
"panels": [],
|
||||
"title": "Operation List",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"filterable": false,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "user"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "none"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "sum_mdsInf"
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(*) FROM \"metaClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"user\" =~ /^$userid$/ GROUP BY \"user\"\n",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"value"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"title": "Meta Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"sum_close": "close",
|
||||
"sum_create": "create",
|
||||
"sum_createLI": "createLI",
|
||||
"sum_mdsInf": "mdsInf",
|
||||
"sum_mkdir": "mkdir",
|
||||
"sum_open": "open",
|
||||
"sum_rddir": "rddir",
|
||||
"sum_ren": "ren",
|
||||
"sum_revalLI": "revalLI",
|
||||
"sum_rmdir": "rmdir",
|
||||
"sum_sAttr": "sAttr",
|
||||
"sum_sChDrct": "sChDrct",
|
||||
"sum_stat": "stat",
|
||||
"sum_statLI": "statLI",
|
||||
"sum_sum": "sum",
|
||||
"sum_trunc": "trunc",
|
||||
"sum_unlnk": "unlnk",
|
||||
"user": "user"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "user"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "none"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "sum_B-rd"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "sum_B-wr"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "SELECT sum(*) FROM \"storageClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"user\" =~ /^$userid$/ GROUP BY \"user\"\n",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table"
|
||||
}
|
||||
],
|
||||
"title": "Storage Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"sum_B-rd": "B-rd",
|
||||
"sum_B-wr": "B-wr",
|
||||
"sum_close": "close",
|
||||
"sum_getFSize": "getFSize",
|
||||
"sum_ops-rd": "ops-rd",
|
||||
"sum_ops-wr": "ops-wr",
|
||||
"sum_sAttr": "sAttr",
|
||||
"sum_sChDrct": "sChDrct",
|
||||
"sum_statfs": "statfs",
|
||||
"sum_storInf": "storinf",
|
||||
"sum_sum": "sum",
|
||||
"sum_trunc": "trunc",
|
||||
"sum_unlnk": "unlnk",
|
||||
"user": "user"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 19
|
||||
},
|
||||
"id": 17,
|
||||
"panels": [],
|
||||
"title": "Meta Operation Per User",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 20
|
||||
},
|
||||
"id": 24,
|
||||
"maxPerRow": 4,
|
||||
"options": {
|
||||
"displayLabels": [
|
||||
"percent",
|
||||
"name"
|
||||
],
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"values": [
|
||||
"value"
|
||||
]
|
||||
},
|
||||
"pieType": "donut",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeat": "userid",
|
||||
"repeatDirection": "h",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "SELECT sum(\"close\") AS \"close\", sum(\"getXA\") AS \"getXA\", sum(\"hardlnk\") AS \"hardlnk\", sum(\"listXA\") AS \"listXA\", sum(\"mkdir\") AS \"mkdir\", sum(\"open\") AS \"open\", sum(\"rddir\") AS \"rddir\", sum(\"ren\") AS \"ren\", sum(\"rmXA\") AS \"rmXA\", sum(\"rmdir\") AS \"rmdir\", sum(\"setXA\") AS \"setXA\", sum(\"stat\") AS \"stat\", sum(\"statfs\") AS \"statfs\", sum(\"symlnk\") AS \"symlnk\", sum(\"trunc\") AS \"trunc\", sum(\"unlnk\") AS \"unlnk\", sum(\"ack\") AS \"ack\", sum(\"create\") AS \"create\", sum(\"createLI\") AS \"createLI\", sum(\"dirparent\") AS \"dirparent\", sum(\"entInf\") AS \"entInf\", sum(\"flckAp\") AS \"flckAp\",sum(\"flckEn\") AS \"flckEn\", sum(\"flckRg\") AS \"flckRg\", sum(\"fndOwn\") AS \"fndOwn\", sum(\"lookLI\") AS \"lookLI\", sum(\"mdsInf\") AS \"mdsInf\", sum(\"mirror\") AS \"mirror\", sum(\"mvDirIns\") AS \"mvDirIns\", sum(\"mvFiIns\") AS \"mvFiIns\", sum(\"openLI\") AS \"openLI\", sum(\"refrEnt\") AS \"refrEnt\", sum(\"revalLI\") AS \"revalLI\", sum(\"rmLnk\") AS \"rmLnk\", sum(\"sAttr\") AS \"sAttr\", sum(\"sChDrct\") AS \"sChDrct\", sum(\"sDirPat\") AS \"sDirPat\", sum(\"statLI\") AS \"statLI\" FROM \"metaClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND (\"user\" =~ /^$userid$/) ",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table"
|
||||
}
|
||||
],
|
||||
"title": "User ID $userid",
|
||||
"type": "piechart"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 28
|
||||
},
|
||||
"id": 8,
|
||||
"panels": [],
|
||||
"title": "Storage Operation Per User",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 29
|
||||
},
|
||||
"id": 10,
|
||||
"maxPerRow": 4,
|
||||
"options": {
|
||||
"displayLabels": [
|
||||
"name",
|
||||
"percent"
|
||||
],
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"values": [
|
||||
"value"
|
||||
]
|
||||
},
|
||||
"pieType": "donut",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.0.8",
|
||||
"repeat": "userid",
|
||||
"repeatDirection": "h",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"user"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
],
|
||||
"measurement": "storageClientOpsByUser",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(\"ack\") AS \"ack\", sum(\"close\") AS \"close\", sum(\"fsync\") AS \"fsync\", sum(\"gendbg\") AS \"gendbg\", sum(\"getFSize\") AS \"getFSize\", sum(\"hrtbeat\") AS \"hrtbeat\", sum(\"ops-rd\") AS \"ops-rd\", sum(\"ops-wr\") AS \"ops-wr\", sum(\"remNode\") AS \"remNode\", sum(\"sAttr\") AS \"sAttr\", sum(\"sChDrct\") AS \"sChDrct\", sum(\"statfs\") AS \"statfs\", sum(\"storInf\") AS \"storInf\", sum(\"trunc\") AS \"trunc\", sum(\"unlnk\") AS \"unlnk\" FROM \"storageClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND (\"user\" =~ /^$userid$/) ",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"B-wr"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "sum"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"write"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"B-rd"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "sum"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"read"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "user",
|
||||
"operator": "=~",
|
||||
"value": "/^$userid$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "User ID $userid",
|
||||
"transparent": true,
|
||||
"type": "piechart"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "show tag values from storageClientOpsByUser with key = \"user\"",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "userid",
|
||||
"options": [],
|
||||
"query": "show tag values from storageClientOpsByUser with key = \"user\"",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Client Operations (by User)",
|
||||
"uid": "RYuIR1V4k",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
582
mon/scripts/grafana/client_ops_user_influxdbv2.json
Normal file
582
mon/scripts/grafana/client_ops_user_influxdbv2.json
Normal file
@@ -0,0 +1,582 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "piechart",
|
||||
"name": "Pie chart",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 13,
|
||||
"panels": [],
|
||||
"title": "Operation List",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "user"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "none"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"frameIndex": 0,
|
||||
"showHeader": true,
|
||||
"sortBy": []
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": " from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByUser\")\r\n |> filter(fn: (r) => r.user =~ /${userid:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Meta Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"sum {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"mdsInf {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "mdsInf",
|
||||
"sChDrct {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "sChDrct",
|
||||
"stat {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "stat",
|
||||
"sum {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "",
|
||||
"user {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "User"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "user"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "none"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-wr"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-rd"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"frameIndex": 0,
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": false,
|
||||
"displayName": "_value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByUser\")\r\n |> filter(fn: (r) => r.user =~ /${userid:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Storage Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 15
|
||||
},
|
||||
"id": 18,
|
||||
"panels": [],
|
||||
"title": "Meta Operation Per User",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 4,
|
||||
"maxPerRow": 4,
|
||||
"options": {
|
||||
"displayLabels": [
|
||||
"percent",
|
||||
"name"
|
||||
],
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"values": [
|
||||
"value"
|
||||
]
|
||||
},
|
||||
"pieType": "donut",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeat": "userid",
|
||||
"repeatDirection": "h",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByUser\")\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> filter(fn: (r) => r.user =~ /$userid$/)",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table"
|
||||
}
|
||||
],
|
||||
"title": "User ID $userid",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"sum": true,
|
||||
"user": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "piechart"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 23,
|
||||
"panels": [],
|
||||
"title": "Storage Operation Per User",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-wr"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-rd"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 25
|
||||
},
|
||||
"id": 33,
|
||||
"maxPerRow": 4,
|
||||
"options": {
|
||||
"displayLabels": [
|
||||
"percent",
|
||||
"name"
|
||||
],
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"values": [
|
||||
"value"
|
||||
]
|
||||
},
|
||||
"pieType": "donut",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeat": "userid",
|
||||
"repeatDirection": "h",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByUser\")\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> filter(fn: (r) => r.user =~ /$userid$/)",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table"
|
||||
}
|
||||
],
|
||||
"title": "User ID $userid",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"sum": true,
|
||||
"user": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "piechart"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"user\", measurement: \"storageClientOpsByUser\")",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "User ID",
|
||||
"multi": true,
|
||||
"name": "userid",
|
||||
"options": [],
|
||||
"query": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"user\", measurement: \"storageClientOpsByUser\")",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "buckets()",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Bucket",
|
||||
"multi": false,
|
||||
"name": "bucket",
|
||||
"options": [],
|
||||
"query": "buckets()",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Client Operations (by User)",
|
||||
"uid": "RBCm2Vk",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
641
mon/scripts/grafana/client_ops_user_telegraf_influxdbv1.json
Normal file
641
mon/scripts/grafana/client_ops_user_telegraf_influxdbv1.json
Normal file
@@ -0,0 +1,641 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "piechart",
|
||||
"name": "Pie chart",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": true,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 6,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": false,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "user"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "none"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "sum_mdsInf"
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "9.5.0",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(*) FROM \"metaClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"user\" =~ /^$userid$/ GROUP BY \"user\"\n",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"value"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"title": "Meta Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"sum_close": "close",
|
||||
"sum_create": "create",
|
||||
"sum_createLI": "createLI",
|
||||
"sum_mdsInf": "mdsInf",
|
||||
"sum_mkdir": "mkdir",
|
||||
"sum_open": "open",
|
||||
"sum_rddir": "rddir",
|
||||
"sum_ren": "ren",
|
||||
"sum_revalLI": "revalLI",
|
||||
"sum_rmdir": "rmdir",
|
||||
"sum_sAttr": "sAttr",
|
||||
"sum_sChDrct": "sChDrct",
|
||||
"sum_stat": "stat",
|
||||
"sum_statLI": "statLI",
|
||||
"sum_sum": "sum",
|
||||
"sum_trunc": "trunc",
|
||||
"sum_unlnk": "unlnk",
|
||||
"user": "user"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "user"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "none"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "sum_B-rd"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "sum_B-wr"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.5.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "SELECT sum(*) FROM \"storageClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"user\" =~ /^$userid$/ GROUP BY \"user\"\n",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table"
|
||||
}
|
||||
],
|
||||
"title": "Storage Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"sum_B-rd": "B-rd",
|
||||
"sum_B-wr": "B-wr",
|
||||
"sum_close": "close",
|
||||
"sum_getFSize": "getFSize",
|
||||
"sum_ops-rd": "ops-rd",
|
||||
"sum_ops-wr": "ops-wr",
|
||||
"sum_sAttr": "sAttr",
|
||||
"sum_sChDrct": "sChDrct",
|
||||
"sum_statfs": "statfs",
|
||||
"sum_storInf": "storinf",
|
||||
"sum_sum": "sum",
|
||||
"sum_trunc": "trunc",
|
||||
"sum_unlnk": "unlnk",
|
||||
"user": "user"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"title": "Operation List",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 17,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 24,
|
||||
"maxPerRow": 4,
|
||||
"options": {
|
||||
"displayLabels": [
|
||||
"percent",
|
||||
"name"
|
||||
],
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"values": [
|
||||
"value"
|
||||
]
|
||||
},
|
||||
"pieType": "donut",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeat": "userid",
|
||||
"repeatDirection": "h",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "SELECT sum(\"close\") AS \"close\", sum(\"getXA\") AS \"getXA\", sum(\"hardlnk\") AS \"hardlnk\", sum(\"listXA\") AS \"listXA\", sum(\"mkdir\") AS \"mkdir\", sum(\"open\") AS \"open\", sum(\"rddir\") AS \"rddir\", sum(\"ren\") AS \"ren\", sum(\"rmXA\") AS \"rmXA\", sum(\"rmdir\") AS \"rmdir\", sum(\"setXA\") AS \"setXA\", sum(\"stat\") AS \"stat\", sum(\"statfs\") AS \"statfs\", sum(\"symlnk\") AS \"symlnk\", sum(\"trunc\") AS \"trunc\", sum(\"unlnk\") AS \"unlnk\", sum(\"ack\") AS \"ack\", sum(\"create\") AS \"create\", sum(\"createLI\") AS \"createLI\", sum(\"dirparent\") AS \"dirparent\", sum(\"entInf\") AS \"entInf\", sum(\"flckAp\") AS \"flckAp\",sum(\"flckEn\") AS \"flckEn\", sum(\"flckRg\") AS \"flckRg\", sum(\"fndOwn\") AS \"fndOwn\", sum(\"lookLI\") AS \"lookLI\", sum(\"mdsInf\") AS \"mdsInf\", sum(\"mirror\") AS \"mirror\", sum(\"mvDirIns\") AS \"mvDirIns\", sum(\"mvFiIns\") AS \"mvFiIns\", sum(\"openLI\") AS \"openLI\", sum(\"refrEnt\") AS \"refrEnt\", sum(\"revalLI\") AS \"revalLI\", sum(\"rmLnk\") AS \"rmLnk\", sum(\"sAttr\") AS \"sAttr\", sum(\"sChDrct\") AS \"sChDrct\", sum(\"sDirPat\") AS \"sDirPat\", sum(\"statLI\") AS \"statLI\" FROM \"metaClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND (\"user\" =~ /^$userid$/) ",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table"
|
||||
}
|
||||
],
|
||||
"title": "User ID $userid",
|
||||
"type": "piechart"
|
||||
}
|
||||
],
|
||||
"title": "Meta Operation Per User",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 2
|
||||
},
|
||||
"id": 8,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"id": 10,
|
||||
"maxPerRow": 4,
|
||||
"options": {
|
||||
"displayLabels": [
|
||||
"name",
|
||||
"percent"
|
||||
],
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"values": [
|
||||
"value"
|
||||
]
|
||||
},
|
||||
"pieType": "donut",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.0.8",
|
||||
"repeat": "userid",
|
||||
"repeatDirection": "h",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"user"
|
||||
],
|
||||
"type": "tag"
|
||||
}
|
||||
],
|
||||
"measurement": "storageClientOpsByUser",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(\"ack\") AS \"ack\", sum(\"close\") AS \"close\", sum(\"fsync\") AS \"fsync\", sum(\"gendbg\") AS \"gendbg\", sum(\"getFSize\") AS \"getFSize\", sum(\"hrtbeat\") AS \"hrtbeat\", sum(\"ops-rd\") AS \"ops-rd\", sum(\"ops-wr\") AS \"ops-wr\", sum(\"remNode\") AS \"remNode\", sum(\"sAttr\") AS \"sAttr\", sum(\"sChDrct\") AS \"sChDrct\", sum(\"statfs\") AS \"statfs\", sum(\"storInf\") AS \"storInf\", sum(\"trunc\") AS \"trunc\", sum(\"unlnk\") AS \"unlnk\" FROM \"storageClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND (\"user\" =~ /^$userid$/) ",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"B-wr"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "sum"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"write"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"B-rd"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "sum"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"read"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "user",
|
||||
"operator": "=~",
|
||||
"value": "/^$userid$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "User ID $userid",
|
||||
"transparent": true,
|
||||
"type": "piechart"
|
||||
}
|
||||
],
|
||||
"title": "Storage Operation Per User",
|
||||
"type": "row"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "show tag values from storageClientOpsByUser with key = \"user\"",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "User ID",
|
||||
"multi": true,
|
||||
"name": "userid",
|
||||
"options": [],
|
||||
"query": "show tag values from storageClientOpsByUser with key = \"user\"",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Client Operations (by User)",
|
||||
"uid": "RYuIR1V4k",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
582
mon/scripts/grafana/client_ops_user_telegraf_influxdbv2.json
Normal file
582
mon/scripts/grafana/client_ops_user_telegraf_influxdbv2.json
Normal file
@@ -0,0 +1,582 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "piechart",
|
||||
"name": "Pie chart",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 2,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 13,
|
||||
"panels": [],
|
||||
"title": "Operation List",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "user"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "none"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"frameIndex": 0,
|
||||
"showHeader": true,
|
||||
"sortBy": []
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": " from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByUser\")\r\n |> filter(fn: (r) => r.user =~ /${userid:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Meta Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"sum {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"mdsInf {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "mdsInf",
|
||||
"sChDrct {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "sChDrct",
|
||||
"stat {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "stat",
|
||||
"sum {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "",
|
||||
"user {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "User"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "user"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "none"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-wr"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-rd"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"enablePagination": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"frameIndex": 0,
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": false,
|
||||
"displayName": "_value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByUser\")\r\n |> filter(fn: (r) => r.user =~ /${userid:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Storage Operation List",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 15
|
||||
},
|
||||
"id": 18,
|
||||
"panels": [],
|
||||
"title": "Meta Operation Per User",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 4,
|
||||
"maxPerRow": 4,
|
||||
"options": {
|
||||
"displayLabels": [
|
||||
"percent",
|
||||
"name"
|
||||
],
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"values": [
|
||||
"value"
|
||||
]
|
||||
},
|
||||
"pieType": "donut",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeat": "userid",
|
||||
"repeatDirection": "h",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByUser\")\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> filter(fn: (r) => r.user =~ /$userid$/)",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table"
|
||||
}
|
||||
],
|
||||
"title": "User ID $userid",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"sum": true,
|
||||
"user": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "piechart"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 23,
|
||||
"panels": [],
|
||||
"title": "Storage Operation Per User",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-wr"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^B-rd"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 25
|
||||
},
|
||||
"id": 33,
|
||||
"maxPerRow": 4,
|
||||
"options": {
|
||||
"displayLabels": [
|
||||
"percent",
|
||||
"name"
|
||||
],
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"values": [
|
||||
"value"
|
||||
]
|
||||
},
|
||||
"pieType": "donut",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeat": "userid",
|
||||
"repeatDirection": "h",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByUser\")\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> filter(fn: (r) => r.user =~ /$userid$/)",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table"
|
||||
}
|
||||
],
|
||||
"title": "User ID $userid",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"sum": true,
|
||||
"user": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "piechart"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"user\", measurement: \"storageClientOpsByUser\")",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "User ID",
|
||||
"multi": true,
|
||||
"name": "userid",
|
||||
"options": [],
|
||||
"query": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"user\", measurement: \"storageClientOpsByUser\")",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "buckets()",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Bucket",
|
||||
"multi": false,
|
||||
"name": "bucket",
|
||||
"options": [],
|
||||
"query": "buckets()",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Client Operations (by User)",
|
||||
"uid": "RBCm2Vk",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
||||
163
mon/scripts/grafana/import-alerts
Executable file
163
mon/scripts/grafana/import-alerts
Executable file
@@ -0,0 +1,163 @@
|
||||
#!/bin/bash
|
||||
|
||||
function addAlert() {
|
||||
alert_json=$(cat "$1")
|
||||
modified_json=$(echo "$alert_json" | sed -e "s/\${DS_UID}/$DATASOURCE_UID/g")
|
||||
curl -s -X POST "$HOST/api/v1/provisioning/alert-rules" \
|
||||
--header "Content-type: application/json" \
|
||||
--header "X-Disable-Provenance;" \
|
||||
--data "$modified_json"
|
||||
}
|
||||
|
||||
function addAlertV2() {
|
||||
alert_json=$(cat "$1")
|
||||
modified_json=$(echo "$alert_json" | sed -e "s/\${DS_UID}/$DATASOURCE_UID/g; s/\${BUCKET}/$BUCKET_NAME/g")
|
||||
curl -s -X POST "$HOST/api/v1/provisioning/alert-rules" \
|
||||
--header "Content-type: application/json" \
|
||||
--header "X-Disable-Provenance;" \
|
||||
--data "$modified_json"
|
||||
}
|
||||
|
||||
function addDashboard() {
|
||||
echo -e "{\"dashboard\": $(cat $1), \"folderUid\": \"beegfsalertfolder\"}" | \
|
||||
sed -e "s,\${DS_BEEGFS_MON_INFLUXDB},$DATASOURCE_NAME,g" | \
|
||||
curl -s -X POST "$HOST/api/dashboards/db" \
|
||||
--header "Content-type: application/json" \
|
||||
--data @-
|
||||
}
|
||||
|
||||
function addFolder() {
|
||||
curl -s -X POST "$HOST/api/folders" \
|
||||
--header "Content-type: application/json" \
|
||||
--data '{"uid": "beegfsalertfolder", "title": "BeeGFS-Alert"}'
|
||||
}
|
||||
|
||||
function addTemplate() {
|
||||
curl -s -X PUT "$HOST/api/v1/provisioning/templates/BeeGFS-Email-Template" \
|
||||
--header "X-Disable-Provenance;" \
|
||||
--header "Content-Type: application/json" \
|
||||
--data "@$alert_path/email-template.json"
|
||||
}
|
||||
|
||||
function addContactPoint() {
|
||||
curl -s -X POST "$HOST/api/v1/provisioning/contact-points" \
|
||||
--header "X-Disable-Provenance;" \
|
||||
--header "Content-Type: application/json" \
|
||||
--data "@$alert_path/contact-point.json"
|
||||
}
|
||||
|
||||
function addPolicies() {
|
||||
update_policies=$(cat $1)
|
||||
curl -s -X PUT "$HOST/api/v1/provisioning/policies" \
|
||||
--header "X-Disable-Provenance;" \
|
||||
--header "Content-Type: application/json" \
|
||||
--data "$update_policies"
|
||||
}
|
||||
|
||||
HOST="http://admin:admin@localhost:3000"
|
||||
|
||||
if [[ $1 != "default" ]] && [[ ! $# -eq 1 ]]; then
|
||||
echo "This script imports the default beegfs-mon Alerts into Grafana using its HTTP API."
|
||||
echo ""
|
||||
echo "Usage: "
|
||||
echo "Default installation to localhost: $(basename "$0") default"
|
||||
echo "Custom installation: $(basename "$0") <grafana url>"
|
||||
echo ""
|
||||
echo "Default:"
|
||||
echo "$(basename "$0") $HOST"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
command -v curl > /dev/null 2>&1 || \
|
||||
{
|
||||
echo "This script requires curl, but it doesn't seem to be installed. Aborting."
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [[ $1 != "default" ]]; then
|
||||
HOST="$1"
|
||||
fi
|
||||
|
||||
echo "Select an option:"
|
||||
echo "1. Using BeeGFS Monitoring with Telegraf"
|
||||
echo "2. Using BeeGFS Monitoring without Telegraf"
|
||||
|
||||
read -p "Enter your Option: " option
|
||||
|
||||
if [[ "$option" == "1" ]]; then
|
||||
monType="wtelegraf"
|
||||
elif [[ "$option" == "2" ]]; then
|
||||
monType="wotelegraf"
|
||||
else
|
||||
echo "*** Please select correct option ***"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Select an option:"
|
||||
echo "Please select influxdb version:"
|
||||
echo "1) Influxdb 1.x"
|
||||
echo "2) Influxdb 2.x"
|
||||
|
||||
read -p "Enter your influxdb Verion: " influxdb_version
|
||||
|
||||
DATASOURCE_UID=$(curl -s "$HOST/api/datasources/name/beegfs_mon_influxdb" | grep -o '"uid": *"[^"]*"' | cut -d'"' -f4)
|
||||
DATASOURCE_NAME=$(curl -s "$HOST/api/datasources/name/beegfs_mon_influxdb" | grep -o '"name": *"[^"]*"' | cut -d'"' -f4)
|
||||
|
||||
if [[ "$influxdb_version" == "2" ]]; then
|
||||
BUCKET_NAME=$(curl -s "$HOST/api/datasources/name/beegfs_mon_influxdb" | grep -o '"defaultBucket": *"[^"]*"' | cut -d'"' -f4)
|
||||
fi
|
||||
|
||||
ALERT_DIR=$(dirname "$0")
|
||||
alert_path="$ALERT_DIR/alerts"
|
||||
addFolder
|
||||
|
||||
|
||||
if [[ "$influxdb_version" == "1" ]] && [[ "$monType" == "wtelegraf" ]]; then
|
||||
|
||||
for alert_file in "$alert_path"/*-v1.json; do
|
||||
if [ -f "$alert_file" ]; then
|
||||
addAlert "$alert_file"
|
||||
fi
|
||||
done
|
||||
|
||||
elif [[ "$influxdb_version" == "2" ]] && [[ "$monType" == "wtelegraf" ]] ; then
|
||||
|
||||
for alert_file in "$alert_path"/*-v2.json; do
|
||||
if [ -f "$alert_file" ]; then
|
||||
addAlertV2 "$alert_file"
|
||||
fi
|
||||
done
|
||||
|
||||
elif [[ "$influxdb_version" == "1" ]] && [[ "$monType" == "wotelegraf" ]] ; then
|
||||
|
||||
addAlert $alert_path/Disk-alert-v1.json
|
||||
addAlert $alert_path/Inodes-alert-v1.json
|
||||
addAlert $alert_path/MetaQueuedrequest-alert-v1.json
|
||||
addAlert $alert_path/StorageQueuedrequest-alert-v1.json
|
||||
|
||||
elif [[ "$influxdb_version" == "2" ]] && [[ "$monType" == "wotelegraf" ]] ; then
|
||||
|
||||
addAlertV2 $alert_path/Disk-alert-v2.json
|
||||
addAlertV2 $alert_path/Inodes-alert-v2.json
|
||||
addAlertV2 $alert_path/MetaQueuedrequest-alert-v2.json
|
||||
addAlertV2 $alert_path/StorageQueuedrequest-alert-v2.json
|
||||
|
||||
else
|
||||
echo "*** Please select correct version of InfluxDB ***"
|
||||
exit 1
|
||||
|
||||
fi
|
||||
|
||||
addDashboard "$alert_path/alert-dashboard.json"
|
||||
addTemplate
|
||||
addContactPoint
|
||||
|
||||
if [[ "$monType" == "wotelegraf" ]]; then
|
||||
addPolicies "$alert_path/policies.json"
|
||||
elif [[ "$monType" == "wtelegraf" ]] ; then
|
||||
addPolicies "$alert_path/policies-telegraf.json"
|
||||
else
|
||||
echo "*** Please notification policies ***"
|
||||
fi
|
||||
|
||||
echo -e "\n\n\n######### Alert is configured. Next step: update email address in contact point of beegfs-email. #########"
|
||||
146
mon/scripts/grafana/import-dashboards
Executable file
146
mon/scripts/grafana/import-dashboards
Executable file
@@ -0,0 +1,146 @@
|
||||
#!/bin/bash
|
||||
|
||||
function addDashboard() {
|
||||
echo -e "{\"dashboard\": $(cat $1) }" | \
|
||||
sed -e "s,\${DS_BEEGFS_MON_INFLUXDB},$DATASOURCE_NAME,g" | \
|
||||
curl -s -X POST "$HOST/api/dashboards/db" \
|
||||
--header "Content-type: application/json" \
|
||||
--data @-
|
||||
}
|
||||
|
||||
function addDatasource() {
|
||||
sed -e "s,%DATABASE_NAME%,$DATABASE_NAME,g" \
|
||||
-e "s,%DATABASE_USER%,$DATABASE_USER,g" \
|
||||
-e "s,%DATASOURCE_URL%,$DATASOURCE_URL,g" \
|
||||
-e "s,%DATASOURCE_NAME%,$DATASOURCE_NAME,g" \
|
||||
-e "s,%PASSWORD%,$PASSWORD,g" \
|
||||
"$1" | \
|
||||
curl -s -X POST "$HOST/api/datasources" \
|
||||
--header "Content-type: application/json" \
|
||||
--data @-
|
||||
}
|
||||
|
||||
|
||||
function addDatasourceV2() {
|
||||
sed -e "s,%BUCKET_NAME%,$BUCKET_NAME,g" \
|
||||
-e "s,%ORG_NAME%,$ORG_NAME,g" \
|
||||
-e "s,%DATASOURCE_URL%,$DATASOURCE_URL,g" \
|
||||
-e "s,%DATASOURCE_NAME%,$DATASOURCE_NAME,g" \
|
||||
-e "s,%TOKEN%,$TOKEN,g" \
|
||||
"$1" | \
|
||||
curl -s -X POST "$HOST/api/datasources" \
|
||||
--header "Content-type: application/json" \
|
||||
--data @-
|
||||
}
|
||||
|
||||
|
||||
DATASOURCE_NAME="beegfs_mon_influxdb"
|
||||
HOST="http://admin:admin@localhost:3000"
|
||||
DATASOURCE_URL="http://localhost:8086"
|
||||
|
||||
if [[ $1 != "default" ]] && [[ ! $# -eq 2 ]]; then
|
||||
echo "This script imports the default beegfs-mon Dashboards into Grafana using its HTTP API."
|
||||
echo "Curl is required."
|
||||
echo ""
|
||||
echo "Usage: "
|
||||
echo "Default installation to localhost: $(basename "$0") default"
|
||||
echo "Custom installation: $(basename "$0") <grafana url> <datasource url>"
|
||||
echo ""
|
||||
echo "Default:"
|
||||
echo "$(basename "$0") $HOST $DATASOURCE_URL $DATABASE_NAME"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
command -v curl > /dev/null 2>&1 || \
|
||||
{
|
||||
echo "This script requires curl, but it doesn't seem to be installed. Aborting."
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "Select an option:"
|
||||
echo "1. Using BeeGFS Monitoring with Telegraf"
|
||||
echo "2. Using BeeGFS Monitoring without Telegraf"
|
||||
|
||||
read -p "Enter your Option: " option
|
||||
|
||||
if [[ "$option" == "1" ]]; then
|
||||
monType="wtelegraf"
|
||||
elif [[ "$option" == "2" ]]; then
|
||||
monType="wotelegraf"
|
||||
else
|
||||
echo "*** Please select correct option ***"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Please select influxdb version:"
|
||||
echo "1) Influxdb 1.x"
|
||||
echo "2) Influxdb 2.x"
|
||||
|
||||
|
||||
if [[ $1 != "default" ]]; then
|
||||
HOST="$1"
|
||||
DATASOURCE_URL="$2"
|
||||
fi
|
||||
|
||||
GRAFANA_DIR=$(dirname "$0")
|
||||
|
||||
read -p "Enter your influxdb Verion: " influxdb_version
|
||||
|
||||
if [[ "$influxdb_version" == "1" ]] ; then
|
||||
|
||||
read -p "Enter Database Name: " DATABASE_NAME
|
||||
read -p "Enter Database User: " DATABASE_USER
|
||||
read -s -p "Enter Database Password: " PASSWORD
|
||||
|
||||
elif [[ "$influxdb_version" == "2" ]] ; then
|
||||
|
||||
read -p "Enter Bucket Name:" BUCKET_NAME
|
||||
read -p "Enter Organizations: " ORG_NAME
|
||||
read -s -p "Enter Token: " TOKEN
|
||||
fi
|
||||
|
||||
if [[ "$influxdb_version" == "1" ]] && [[ "$monType" == "wtelegraf" ]] ; then
|
||||
|
||||
addDatasource "$GRAFANA_DIR/influxdb.json"
|
||||
addDashboard "$GRAFANA_DIR/beegfs_overview_telegraf_influxdbv1.json"
|
||||
addDashboard "$GRAFANA_DIR/meta_telegraf_influxdbv1.json"
|
||||
addDashboard "$GRAFANA_DIR/storage_telegraf_influxdbv1.json"
|
||||
addDashboard "$GRAFANA_DIR/storage_targets_telegraf_influxdbv1.json"
|
||||
addDashboard "$GRAFANA_DIR/client_ops_node_telegraf_influxdbv1.json"
|
||||
addDashboard "$GRAFANA_DIR/client_ops_user_telegraf_influxdbv1.json"
|
||||
|
||||
elif [[ "$influxdb_version" == "2" ]] && [[ "$monType" == "wtelegraf" ]] ; then
|
||||
|
||||
addDatasourceV2 "$GRAFANA_DIR/influxdbV2.json"
|
||||
addDashboard "$GRAFANA_DIR/beegfs_overview_telegraf_influxdbv2.json"
|
||||
addDashboard "$GRAFANA_DIR/meta_telegraf_influxdbv2.json"
|
||||
addDashboard "$GRAFANA_DIR/storage_telegraf_influxdbv2.json"
|
||||
addDashboard "$GRAFANA_DIR/storage_targets_telegraf_influxdbv2.json"
|
||||
addDashboard "$GRAFANA_DIR/client_ops_node_telegraf_influxdbv2.json"
|
||||
addDashboard "$GRAFANA_DIR/client_ops_user_telegraf_influxdbv2.json"
|
||||
|
||||
elif [[ "$influxdb_version" == "1" ]] && [[ "$monType" == "wotelegraf" ]] ; then
|
||||
|
||||
addDatasource "$GRAFANA_DIR/influxdb.json"
|
||||
addDashboard "$GRAFANA_DIR/beegfs_overview_influxdbv1.json"
|
||||
addDashboard "$GRAFANA_DIR/meta_influxdbv1.json"
|
||||
addDashboard "$GRAFANA_DIR/storage_influxdbv1.json"
|
||||
addDashboard "$GRAFANA_DIR/storage_targets_influxdbv1.json"
|
||||
addDashboard "$GRAFANA_DIR/client_ops_node_influxdbv1.json"
|
||||
addDashboard "$GRAFANA_DIR/client_ops_user_influxdbv1.json"
|
||||
|
||||
elif [[ "$influxdb_version" == "2" ]] && [[ "$monType" == "wotelegraf" ]] ; then
|
||||
|
||||
addDatasourceV2 "$GRAFANA_DIR/influxdbV2.json"
|
||||
addDashboard "$GRAFANA_DIR/beegfs_overview_influxdbv2.json"
|
||||
addDashboard "$GRAFANA_DIR/meta_influxdbv2.json"
|
||||
addDashboard "$GRAFANA_DIR/storage_influxdbv2.json"
|
||||
addDashboard "$GRAFANA_DIR/storage_targets_influxdbv2.json"
|
||||
addDashboard "$GRAFANA_DIR/client_ops_node_influxdbv2.json"
|
||||
addDashboard "$GRAFANA_DIR/client_ops_user_influxdbv2.json"
|
||||
|
||||
else
|
||||
echo "*** Please select correct version of InfluxDB ***"
|
||||
exit 1
|
||||
|
||||
fi
|
||||
10
mon/scripts/grafana/influxdb.json
Normal file
10
mon/scripts/grafana/influxdb.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"name":"%DATASOURCE_NAME%",
|
||||
"type":"influxdb",
|
||||
"url":"%DATASOURCE_URL%",
|
||||
"access":"proxy",
|
||||
"user":"%DATABASE_USER%",
|
||||
"database":"%DATABASE_NAME%",
|
||||
"secureJsonData":{
|
||||
"password":"%PASSWORD%"}
|
||||
}
|
||||
12
mon/scripts/grafana/influxdbV2.json
Normal file
12
mon/scripts/grafana/influxdbV2.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"name":"%DATASOURCE_NAME%",
|
||||
"type":"influxdb",
|
||||
"url":"%DATASOURCE_URL%",
|
||||
"access":"proxy",
|
||||
"jsonData":{
|
||||
"organization":"%ORG_NAME%",
|
||||
"defaultBucket":"%BUCKET_NAME%",
|
||||
"version":"Flux"},
|
||||
"secureJsonData":{
|
||||
"token":"%TOKEN%"}
|
||||
}
|
||||
876
mon/scripts/grafana/meta_influxdbv1.json
Normal file
876
mon/scripts/grafana/meta_influxdbv1.json
Normal file
@@ -0,0 +1,876 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "timeseries",
|
||||
"name": "Time series",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 2,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "continuous-YlBl",
|
||||
"seriesBy": "max"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 60,
|
||||
"gradientMode": "opacity",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"log": 10,
|
||||
"type": "log"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Received"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#3274d9",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Sent"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "light-green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.transform",
|
||||
"value": "negative-Y"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 30,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "asc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"alias": "Received",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"10s"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "highResMeta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"netRecvBytes"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeNumID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"alias": "Sent",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"10s"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"measurement": "highResMeta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"netSendBytes"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeNumID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Network Traffic",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "hue",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Processed"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#36bdbc",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Queued"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#ffb357",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 32,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"alias": "Processed",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "highResMeta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"workRequests"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeNumID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"alias": "Queued",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"measurement": "highResMeta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"queuedRequests"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeNumID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Work Requests",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 46,
|
||||
"links": [],
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"alias": "Responding",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"dsType": "influxdb",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"1m"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "meta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(\"isResponding\") FROM \"meta\" WHERE \"nodeID\" =~ /^$metaID$/ AND $timeFilter GROUP BY time($__interval) fill(previous)",
|
||||
"rawQuery": false,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"isResponding"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "last"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeNumID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Availability",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "opacity",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Direct"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "super-light-yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Indirect"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "super-light-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 34,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"alias": "Direct",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "meta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"directWorkListSize"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeNumID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"alias": "Indirect",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"measurement": "meta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"indirectWorkListSize"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeNumID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Worklist Size",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "show tag values from meta with key = \"nodeNumID\" ",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"multi": false,
|
||||
"name": "metaID",
|
||||
"options": [],
|
||||
"query": "show tag values from meta with key = \"nodeNumID\" ",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Meta Server",
|
||||
"uid": "OUJBUPQW",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
903
mon/scripts/grafana/meta_influxdbv2.json
Normal file
903
mon/scripts/grafana/meta_influxdbv2.json
Normal file
@@ -0,0 +1,903 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "timeseries",
|
||||
"name": "Time series",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 2,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "continuous-YlBl",
|
||||
"seriesBy": "max"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 60,
|
||||
"gradientMode": "opacity",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"log": 10,
|
||||
"type": "log"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Received"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Received"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#3274d9",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Send"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Send"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#96d98d",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.transform",
|
||||
"value": "negative-Y"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 22,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "asc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"alias": "Received",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"10s"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "highResMeta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"highResMeta\" and r._field == \"netRecvBytes\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Received\"})",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"netRecvBytes"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"alias": "Sent",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"10s"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"measurement": "highResMeta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"highResMeta\" and r._field == \"netSendBytes\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Send\"})",
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"netSendBytes"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Network Traffic",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "hue",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Processed"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Processed"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#36bdbc",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Queued"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Queued"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#ffb357",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 24,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"alias": "Processed",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "highResMeta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"highResMeta\" and r._field == \"workRequests\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Processed\"})",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"workRequests"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"alias": "Queued",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"measurement": "highResMeta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"highResMeta\" and r._field == \"queuedRequests\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Queued\"})",
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"queuedRequests"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Work Requests",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"fixedColor": "#9340cc8f",
|
||||
"mode": "fixed"
|
||||
},
|
||||
"custom": {
|
||||
"align": "center",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "semi-dark-purple",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Value"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "isResponding"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 8,
|
||||
"links": [],
|
||||
"maxDataPoints": 100,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"frameIndex": 0,
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"dsType": "influxdb",
|
||||
"groupBy": [],
|
||||
"measurement": "meta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"meta\" and r._field == \"isResponding\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> keep(columns: [\"_time\", \"_value\"]) |> aggregateWindow(every: 1m, fn: last, createEmpty: false) |> yield(name: \"last\") ",
|
||||
"rawQuery": false,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"isResponding"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "last"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Availability",
|
||||
"transformations": [],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "opacity",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Direct"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "super-light-yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Direct"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Indirect"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "super-light-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Indirect"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 26,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"alias": "Direct",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "meta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"meta\" and r._field == \"directWorkListSize\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Direct\"})",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"directWorkListSize"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"alias": "Indirect",
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"measurement": "meta",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"meta\" and r._field == \"indirectWorkListSize\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Indirect\"})",
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"indirectWorkListSize"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "nodeID",
|
||||
"operator": "=~",
|
||||
"value": "/^$metaID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Worklist Size",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "buckets()",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Bucket",
|
||||
"multi": false,
|
||||
"name": "bucket",
|
||||
"options": [],
|
||||
"query": "buckets()",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"meta\")",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "metaID",
|
||||
"multi": false,
|
||||
"name": "metaID",
|
||||
"options": [],
|
||||
"query": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"meta\")",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Meta Server",
|
||||
"uid": "OTSb6z",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
3195
mon/scripts/grafana/meta_telegraf_influxdbv1.json
Normal file
3195
mon/scripts/grafana/meta_telegraf_influxdbv1.json
Normal file
File diff suppressed because it is too large
Load Diff
2774
mon/scripts/grafana/meta_telegraf_influxdbv2.json
Normal file
2774
mon/scripts/grafana/meta_telegraf_influxdbv2.json
Normal file
File diff suppressed because it is too large
Load Diff
1311
mon/scripts/grafana/storage_influxdbv1.json
Normal file
1311
mon/scripts/grafana/storage_influxdbv1.json
Normal file
File diff suppressed because it is too large
Load Diff
1352
mon/scripts/grafana/storage_influxdbv2.json
Normal file
1352
mon/scripts/grafana/storage_influxdbv2.json
Normal file
File diff suppressed because it is too large
Load Diff
633
mon/scripts/grafana/storage_targets_influxdbv1.json
Normal file
633
mon/scripts/grafana/storage_targets_influxdbv1.json
Normal file
@@ -0,0 +1,633 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "timeseries",
|
||||
"name": "Time series",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 2,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"axisSoftMin": 0,
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "hue",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "storageTargets.Total"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Disk Total"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Used"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Disk Used"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "storageTargets.Free"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": true,
|
||||
"tooltip": true,
|
||||
"viz": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"links": [],
|
||||
"maxPerRow": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.3.2",
|
||||
"repeat": "storageTargetID",
|
||||
"repeatDirection": "v",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"dsType": "influxdb",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "storageTargets",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT max(\"diskSpaceTotal\"), last(\"diskSpaceFree\"), difference(\"diskSpaceTotal\"), difference(\"diskSpaceTotal\") FROM \"storageTargets\" WHERE (\"storageTargetID\" =~ /^$storageTargetID$/) AND $timeFilter GROUP BY time($__interval) fill(none)",
|
||||
"rawQuery": false,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"diskSpaceTotal"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"Total"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"diskSpaceFree"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"Free"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "storageTargetID",
|
||||
"operator": "=~",
|
||||
"value": "/^$storageTargetID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Disk Space ($storageTargetID)",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "calculateField",
|
||||
"options": {
|
||||
"alias": "Used",
|
||||
"binary": {
|
||||
"left": "storageTargets.Total",
|
||||
"operator": "-",
|
||||
"reducer": "sum",
|
||||
"right": "storageTargets.Free"
|
||||
},
|
||||
"mode": "binary",
|
||||
"reduce": {
|
||||
"reducer": "sum"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "hue",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 0,
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "storageTargets.Inodes Total"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#73ffe4",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Inodes Total"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Inodes Used"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "storageTargets.Inodes Free"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": true,
|
||||
"tooltip": true,
|
||||
"viz": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 5,
|
||||
"maxPerRow": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeat": "storageTargetID",
|
||||
"repeatDirection": "v",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "storageTargets",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"inodesTotal"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"Inodes Total"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"inodesFree"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"Inodes Free"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "storageTargetID",
|
||||
"operator": "=~",
|
||||
"value": "/^$storageTargetID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Inodes ($storageTargetID)",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "calculateField",
|
||||
"options": {
|
||||
"alias": "Inodes Used",
|
||||
"binary": {
|
||||
"left": "storageTargets.Inodes Total",
|
||||
"operator": "-",
|
||||
"reducer": "sum",
|
||||
"right": "storageTargets.Inodes Free"
|
||||
},
|
||||
"mode": "binary",
|
||||
"reduce": {
|
||||
"reducer": "sum"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "show tag values from storage with key IN ( \"nodeNumID\") ",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Storage ID",
|
||||
"multi": false,
|
||||
"name": "storageID",
|
||||
"options": [],
|
||||
"query": "show tag values from storage with key IN ( \"nodeNumID\") ",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "show tag values from storageTargets with key = \"storageTargetID\" where nodeNumID =~ /^$storageID$/",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Storage TargetID",
|
||||
"multi": true,
|
||||
"name": "storageTargetID",
|
||||
"options": [],
|
||||
"query": "show tag values from storageTargets with key = \"storageTargetID\" where nodeNumID =~ /^$storageID$/",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Storage Targets",
|
||||
"uid": "NyuGiE04k",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
445
mon/scripts/grafana/storage_targets_influxdbv2.json
Normal file
445
mon/scripts/grafana/storage_targets_influxdbv2.json
Normal file
@@ -0,0 +1,445 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "timeseries",
|
||||
"name": "Time series",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 2,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "hue",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "diskSpaceTotal"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Disk Space Total"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "diskSpaceUsed"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Disk Space Used"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "diskSpaceFree"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": true,
|
||||
"tooltip": true,
|
||||
"viz": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.3.2",
|
||||
"repeat": "storageTargetID",
|
||||
"repeatDirection": "v",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "from(bucket: \"${bucket}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r.storageTargetID == \"${storageTargetID}\") \r\n|> filter(fn: (r) => r._measurement == \"storageTargets\")\r\n|> filter(fn: (r) => r._field == \"diskSpaceTotal\" or r._field == \"diskSpaceFree\")\r\n|> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n|> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n|> map(fn: (r) => ({ r with _value: r.diskSpaceTotal - r.diskSpaceFree }))\r\n|> rename(columns: {_value: \"diskSpaceUsed\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk Usage ($storageTargetID)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "continuous-YlBl"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "hue",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 0,
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "inodesTotal"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Inodes Total"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#73ffe4",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "inodesUsed"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Inodes Used"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "inodesFree"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": true,
|
||||
"tooltip": true,
|
||||
"viz": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeat": "storageTargetID",
|
||||
"repeatDirection": "v",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"hide": false,
|
||||
"query": "from(bucket: \"${bucket}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r.storageTargetID == \"${storageTargetID}\") \r\n|> filter(fn: (r) => r._measurement == \"storageTargets\")\r\n|> filter(fn: (r) => r._field == \"inodesTotal\" or r._field == \"inodesFree\" )\r\n|> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n|> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n|> map(fn: (r) => ({ r with _value: r.inodesTotal - r.inodesFree }))\r\n|> rename(columns: {_value: \"inodesUsed\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Inodes ($storageTargetID)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "buckets()",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Bucket",
|
||||
"multi": false,
|
||||
"name": "bucket",
|
||||
"options": [],
|
||||
"query": "buckets()",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"storage\")",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Storage ID",
|
||||
"multi": false,
|
||||
"name": "storageID",
|
||||
"options": [],
|
||||
"query": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"storage\")",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "import \"influxdata/influxdb/schema\"schema.tagValues( bucket: \"${bucket}\", tag: \"storageTargetID\", predicate: (r) => r._measurement == \"storageTargets\" and r.nodeNumID == \"${storageID:\"\"}\")",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Storage TargetID",
|
||||
"multi": true,
|
||||
"name": "storageTargetID",
|
||||
"options": [],
|
||||
"query": "import \"influxdata/influxdb/schema\"schema.tagValues( bucket: \"${bucket}\", tag: \"storageTargetID\", predicate: (r) => r._measurement == \"storageTargets\" and r.nodeNumID == \"${storageID:\"\"}\")",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Storage Targets",
|
||||
"uid": "CtdY1AVzy",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
633
mon/scripts/grafana/storage_targets_telegraf_influxdbv1.json
Normal file
633
mon/scripts/grafana/storage_targets_telegraf_influxdbv1.json
Normal file
@@ -0,0 +1,633 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "timeseries",
|
||||
"name": "Time series",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 2,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"axisSoftMin": 0,
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "hue",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "storageTargets.Total"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Disk Total"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Used"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Disk Used"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "storageTargets.Free"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": true,
|
||||
"tooltip": true,
|
||||
"viz": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"links": [],
|
||||
"maxPerRow": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.3.2",
|
||||
"repeat": "storageTargetID",
|
||||
"repeatDirection": "v",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"dsType": "influxdb",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"none"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "storageTargets",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT max(\"diskSpaceTotal\"), last(\"diskSpaceFree\"), difference(\"diskSpaceTotal\"), difference(\"diskSpaceTotal\") FROM \"storageTargets\" WHERE (\"storageTargetID\" =~ /^$storageTargetID$/) AND $timeFilter GROUP BY time($__interval) fill(none)",
|
||||
"rawQuery": false,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"diskSpaceTotal"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"Total"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"diskSpaceFree"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"Free"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "storageTargetID",
|
||||
"operator": "=~",
|
||||
"value": "/^$storageTargetID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Disk Space ($storageTargetID)",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "calculateField",
|
||||
"options": {
|
||||
"alias": "Used",
|
||||
"binary": {
|
||||
"left": "storageTargets.Total",
|
||||
"operator": "-",
|
||||
"reducer": "sum",
|
||||
"right": "storageTargets.Free"
|
||||
},
|
||||
"mode": "binary",
|
||||
"reduce": {
|
||||
"reducer": "sum"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "hue",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 0,
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "storageTargets.Inodes Total"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#73ffe4",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Inodes Total"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Inodes Used"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "storageTargets.Inodes Free"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": true,
|
||||
"tooltip": true,
|
||||
"viz": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 5,
|
||||
"maxPerRow": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeat": "storageTargetID",
|
||||
"repeatDirection": "v",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "storageTargets",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"inodesTotal"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "max"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"Inodes Total"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"inodesFree"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "last"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"Inodes Free"
|
||||
],
|
||||
"type": "alias"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"key": "storageTargetID",
|
||||
"operator": "=~",
|
||||
"value": "/^$storageTargetID$/"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Inodes ($storageTargetID)",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "calculateField",
|
||||
"options": {
|
||||
"alias": "Inodes Used",
|
||||
"binary": {
|
||||
"left": "storageTargets.Inodes Total",
|
||||
"operator": "-",
|
||||
"reducer": "sum",
|
||||
"right": "storageTargets.Inodes Free"
|
||||
},
|
||||
"mode": "binary",
|
||||
"reduce": {
|
||||
"reducer": "sum"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "show tag values from storage with key IN ( \"nodeNumID\") ",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Storage ID",
|
||||
"multi": false,
|
||||
"name": "storageID",
|
||||
"options": [],
|
||||
"query": "show tag values from storage with key IN ( \"nodeNumID\") ",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "show tag values from storageTargets with key = \"storageTargetID\" where nodeNumID =~ /^$storageID$/",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Storage TargetID",
|
||||
"multi": true,
|
||||
"name": "storageTargetID",
|
||||
"options": [],
|
||||
"query": "show tag values from storageTargets with key = \"storageTargetID\" where nodeNumID =~ /^$storageID$/",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Storage Targets",
|
||||
"uid": "NyuGiE04k",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
445
mon/scripts/grafana/storage_targets_telegraf_influxdbv2.json
Normal file
445
mon/scripts/grafana/storage_targets_telegraf_influxdbv2.json
Normal file
@@ -0,0 +1,445 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_BEEGFS_MON_INFLUXDB",
|
||||
"label": "beegfs_mon_influxdb",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "timeseries",
|
||||
"name": "Time series",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 2,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "hue",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "diskSpaceTotal"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Disk Space Total"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "diskSpaceUsed"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Disk Space Used"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "diskSpaceFree"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": true,
|
||||
"tooltip": true,
|
||||
"viz": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.3.2",
|
||||
"repeat": "storageTargetID",
|
||||
"repeatDirection": "v",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"query": "from(bucket: \"${bucket}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r.storageTargetID == \"${storageTargetID}\") \r\n|> filter(fn: (r) => r._measurement == \"storageTargets\")\r\n|> filter(fn: (r) => r._field == \"diskSpaceTotal\" or r._field == \"diskSpaceFree\")\r\n|> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n|> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n|> map(fn: (r) => ({ r with _value: r.diskSpaceTotal - r.diskSpaceFree }))\r\n|> rename(columns: {_value: \"diskSpaceUsed\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk Usage ($storageTargetID)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "continuous-YlBl"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 30,
|
||||
"gradientMode": "hue",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 0,
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "inodesTotal"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Inodes Total"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#73ffe4",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "inodesUsed"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Inodes Used"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "semi-dark-yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "inodesFree"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": true,
|
||||
"tooltip": true,
|
||||
"viz": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeat": "storageTargetID",
|
||||
"repeatDirection": "v",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"hide": false,
|
||||
"query": "from(bucket: \"${bucket}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r.storageTargetID == \"${storageTargetID}\") \r\n|> filter(fn: (r) => r._measurement == \"storageTargets\")\r\n|> filter(fn: (r) => r._field == \"inodesTotal\" or r._field == \"inodesFree\" )\r\n|> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n|> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n|> map(fn: (r) => ({ r with _value: r.inodesTotal - r.inodesFree }))\r\n|> rename(columns: {_value: \"inodesUsed\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Inodes ($storageTargetID)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "buckets()",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Bucket",
|
||||
"multi": false,
|
||||
"name": "bucket",
|
||||
"options": [],
|
||||
"query": "buckets()",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"storage\")",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Storage ID",
|
||||
"multi": false,
|
||||
"name": "storageID",
|
||||
"options": [],
|
||||
"query": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"storage\")",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "influxdb",
|
||||
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
|
||||
},
|
||||
"definition": "import \"influxdata/influxdb/schema\"schema.tagValues( bucket: \"${bucket}\", tag: \"storageTargetID\", predicate: (r) => r._measurement == \"storageTargets\" and r.nodeNumID == \"${storageID:\"\"}\")",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Storage TargetID",
|
||||
"multi": true,
|
||||
"name": "storageTargetID",
|
||||
"options": [],
|
||||
"query": "import \"influxdata/influxdb/schema\"schema.tagValues( bucket: \"${bucket}\", tag: \"storageTargetID\", predicate: (r) => r._measurement == \"storageTargets\" and r.nodeNumID == \"${storageID:\"\"}\")",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "BeeGFS Storage Targets",
|
||||
"uid": "CtdY1AVzy",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
4068
mon/scripts/grafana/storage_telegraf_influxdbv1.json
Normal file
4068
mon/scripts/grafana/storage_telegraf_influxdbv1.json
Normal file
File diff suppressed because it is too large
Load Diff
3177
mon/scripts/grafana/storage_telegraf_influxdbv2.json
Normal file
3177
mon/scripts/grafana/storage_telegraf_influxdbv2.json
Normal file
File diff suppressed because it is too large
Load Diff
324
mon/source/app/App.cpp
Normal file
324
mon/source/app/App.cpp
Normal file
@@ -0,0 +1,324 @@
|
||||
#include "App.h"
|
||||
|
||||
#include <app/SignalHandler.h>
|
||||
#include <common/components/ComponentInitException.h>
|
||||
#include <common/components/worker/DummyWork.h>
|
||||
#include <misc/Cassandra.h>
|
||||
#include <misc/InfluxDB.h>
|
||||
|
||||
|
||||
App::App(int argc, char** argv) :
|
||||
argc(argc), argv(argv)
|
||||
{}
|
||||
|
||||
void App::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
cfg = boost::make_unique<Config>(argc,argv);
|
||||
runNormal();
|
||||
appResult = AppCode::NO_ERROR;
|
||||
}
|
||||
catch (const InvalidConfigException& e)
|
||||
{
|
||||
std::ostringstream err;
|
||||
err << "Config error: " << e.what() << std::endl
|
||||
<< "[BeeGFS Mon Version: " << BEEGFS_VERSION << std::endl
|
||||
<< "Refer to the default config file (/etc/beegfs/beegfs-mon.conf)" << std::endl
|
||||
<< "or visit http://www.beegfs.com to find out about configuration options.]";
|
||||
printOrLogError(err.str());
|
||||
appResult = AppCode::INVALID_CONFIG;
|
||||
}
|
||||
catch (const ComponentInitException& e)
|
||||
{
|
||||
printOrLogError("Component initialization error: " + std::string(e.what()));
|
||||
appResult = AppCode::INITIALIZATION_ERROR;
|
||||
}
|
||||
catch (const std::runtime_error& e)
|
||||
{
|
||||
printOrLogError("Runtime error: " + std::string(e.what()));
|
||||
appResult = AppCode::RUNTIME_ERROR;
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
printOrLogError("Generic error: " + std::string(e.what()));
|
||||
appResult = AppCode::RUNTIME_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
void App::printOrLogError(const std::string& text) const
|
||||
{
|
||||
if (Logger::isInitialized())
|
||||
LOG(GENERAL, ERR, text);
|
||||
else
|
||||
std::cerr << std::endl << text << std::endl << std::endl;
|
||||
}
|
||||
|
||||
void App::runNormal()
|
||||
{
|
||||
Logger::createLogger(cfg->getLogLevel(), cfg->getLogType(), cfg->getLogNoDate(),
|
||||
cfg->getLogStdFile(), cfg->getLogNumLines(), cfg->getLogNumRotatedFiles());
|
||||
|
||||
pidFileLockFD = createAndLockPIDFile(cfg->getPIDFile());
|
||||
initDataObjects();
|
||||
SignalHandler::registerSignalHandler(this);
|
||||
initLocalNodeInfo();
|
||||
initWorkers();
|
||||
initComponents();
|
||||
|
||||
RDMASocket::rdmaForkInitOnce();
|
||||
|
||||
|
||||
if (cfg->getRunDaemonized())
|
||||
daemonize();
|
||||
|
||||
logInfos();
|
||||
|
||||
// make sure components don't receive SIGINT/SIGTERM (blocked signals are inherited)
|
||||
PThread::blockInterruptSignals();
|
||||
startWorkers();
|
||||
startComponents();
|
||||
PThread::unblockInterruptSignals();
|
||||
|
||||
joinComponents();
|
||||
joinWorkers();
|
||||
}
|
||||
|
||||
void App::initLocalNodeInfo()
|
||||
{
|
||||
bool useRDMA = cfg->getConnUseRDMA();
|
||||
unsigned portUDP = cfg->getConnMonPort();
|
||||
|
||||
StringList allowedInterfaces;
|
||||
std::string interfacesFilename = cfg->getConnInterfacesFile();
|
||||
if (interfacesFilename.length() )
|
||||
cfg->loadStringListFile(interfacesFilename.c_str(), allowedInterfaces);
|
||||
|
||||
NetworkInterfaceCard::findAll(&allowedInterfaces, useRDMA, &localNicList);
|
||||
|
||||
if (localNicList.empty() )
|
||||
throw InvalidConfigException("Couldn't find any usable NIC");
|
||||
|
||||
localNicList.sort(NetworkInterfaceCard::NicAddrComp{&allowedInterfaces});
|
||||
NetworkInterfaceCard::supportedCapabilities(&localNicList, &localNicCaps);
|
||||
|
||||
noDefaultRouteNets = std::make_shared<NetVector>();
|
||||
if(!initNoDefaultRouteList(noDefaultRouteNets.get()))
|
||||
throw InvalidConfigException("Failed to parse connNoDefaultRoute");
|
||||
|
||||
initRoutingTable();
|
||||
updateRoutingTable();
|
||||
|
||||
std::string nodeID = System::getHostname();
|
||||
|
||||
// TODO add a Mon nodetype at some point
|
||||
localNode = std::make_shared<LocalNode>(NODETYPE_Client, nodeID, NumNodeID(1), portUDP, 0, localNicList);
|
||||
}
|
||||
|
||||
void App::initDataObjects()
|
||||
{
|
||||
netFilter = boost::make_unique<NetFilter>(cfg->getConnNetFilterFile());
|
||||
tcpOnlyFilter = boost::make_unique<NetFilter>(cfg->getConnTcpOnlyFilterFile());
|
||||
netMessageFactory = boost::make_unique<NetMessageFactory>();
|
||||
workQueue = boost::make_unique<MultiWorkQueue>();
|
||||
|
||||
targetMapper = boost::make_unique<TargetMapper>();
|
||||
|
||||
metaNodes = boost::make_unique<NodeStoreMetaEx>();
|
||||
storageNodes = boost::make_unique<NodeStoreStorageEx>();
|
||||
mgmtNodes = boost::make_unique<NodeStoreMgmtEx>();
|
||||
|
||||
metaBuddyGroupMapper = boost::make_unique<MirrorBuddyGroupMapper>();
|
||||
storageBuddyGroupMapper = boost::make_unique<MirrorBuddyGroupMapper>();
|
||||
|
||||
|
||||
if (cfg->getDbType() == Config::DbTypes::CASSANDRA)
|
||||
{
|
||||
Cassandra::Config cassandraConfig;
|
||||
cassandraConfig.host = cfg->getDbHostName();
|
||||
cassandraConfig.port = cfg->getDbHostPort();
|
||||
cassandraConfig.database = cfg->getDbDatabase();
|
||||
cassandraConfig.maxInsertsPerBatch = cfg->getCassandraMaxInsertsPerBatch();
|
||||
cassandraConfig.TTLSecs = cfg->getCassandraTTLSecs();
|
||||
|
||||
tsdb = boost::make_unique<Cassandra>(std::move(cassandraConfig));
|
||||
}
|
||||
else // Config::DbTypes::INFLUXDB OR Config::DbTypes::INFLUXDB2
|
||||
{
|
||||
InfluxDB::Config influxdbConfig;
|
||||
influxdbConfig.host = cfg->getDbHostName();
|
||||
influxdbConfig.port = cfg->getDbHostPort();
|
||||
influxdbConfig.maxPointsPerRequest = cfg->getInfluxdbMaxPointsPerRequest();
|
||||
influxdbConfig.httpTimeout = cfg->getHttpTimeout();
|
||||
influxdbConfig.curlCheckSSLCertificates = cfg->getCurlCheckSSLCertificates();
|
||||
if (cfg->getDbType() == Config::DbTypes::INFLUXDB2)
|
||||
{
|
||||
influxdbConfig.bucket = cfg->getDbBucket();
|
||||
influxdbConfig.organization = cfg->getDbAuthOrg();
|
||||
influxdbConfig.token = cfg->getDbAuthToken();
|
||||
influxdbConfig.dbVersion = INFLUXDB2;
|
||||
}
|
||||
else
|
||||
{
|
||||
influxdbConfig.database = cfg->getDbDatabase();
|
||||
influxdbConfig.setRetentionPolicy = cfg->getInfluxDbSetRetentionPolicy();
|
||||
influxdbConfig.retentionDuration = cfg->getInfluxDbRetentionDuration();
|
||||
influxdbConfig.username = cfg->getDbAuthUsername();
|
||||
influxdbConfig.password = cfg->getDbAuthPassword();
|
||||
influxdbConfig.dbVersion = INFLUXDB;
|
||||
}
|
||||
tsdb = boost::make_unique<InfluxDB>(std::move(influxdbConfig));
|
||||
}
|
||||
}
|
||||
|
||||
void App::initComponents()
|
||||
{
|
||||
nodeListRequestor = boost::make_unique<NodeListRequestor>(this);
|
||||
statsCollector = boost::make_unique<StatsCollector>(this);
|
||||
cleanUp = boost::make_unique<CleanUp>(this);
|
||||
}
|
||||
|
||||
void App::startComponents()
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Starting components...");
|
||||
nodeListRequestor->start();
|
||||
statsCollector->start();
|
||||
cleanUp->start();
|
||||
LOG(GENERAL, DEBUG, "Components running.");
|
||||
}
|
||||
|
||||
void App::stopComponents()
|
||||
{
|
||||
if (nodeListRequestor)
|
||||
nodeListRequestor->selfTerminate();
|
||||
if (statsCollector)
|
||||
statsCollector->selfTerminate();
|
||||
if (cleanUp)
|
||||
cleanUp->selfTerminate();
|
||||
|
||||
stopWorkers();
|
||||
selfTerminate();
|
||||
}
|
||||
|
||||
void App::joinComponents()
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Joining Component threads...");
|
||||
nodeListRequestor->join();
|
||||
statsCollector->join();
|
||||
cleanUp->join();
|
||||
LOG(GENERAL, CRITICAL, "All components stopped. Exiting now.");
|
||||
}
|
||||
|
||||
void App::initWorkers()
|
||||
{
|
||||
const unsigned numDirectWorkers = 1;
|
||||
const unsigned workersBufSize = 1024*1024;
|
||||
|
||||
unsigned numWorkers = cfg->getTuneNumWorkers();
|
||||
|
||||
for (unsigned i=0; i < numWorkers; i++)
|
||||
{
|
||||
auto worker = boost::make_unique<Worker>("Worker" + StringTk::intToStr(i+1),
|
||||
workQueue.get(), QueueWorkType_INDIRECT);
|
||||
|
||||
worker->setBufLens(workersBufSize, workersBufSize);
|
||||
workerList.push_back(std::move(worker));
|
||||
}
|
||||
|
||||
for (unsigned i=0; i < numDirectWorkers; i++)
|
||||
{
|
||||
auto worker = boost::make_unique<Worker>("DirectWorker" + StringTk::intToStr(i+1),
|
||||
workQueue.get(), QueueWorkType_DIRECT);
|
||||
|
||||
worker->setBufLens(workersBufSize, workersBufSize);
|
||||
workerList.push_back(std::move(worker));
|
||||
}
|
||||
}
|
||||
|
||||
void App::startWorkers()
|
||||
{
|
||||
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
|
||||
{
|
||||
(*worker)->start();
|
||||
}
|
||||
}
|
||||
|
||||
void App::stopWorkers()
|
||||
{
|
||||
// need two loops because we don't know if the worker that handles the work will be the same that
|
||||
// received the self-terminate-request
|
||||
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
|
||||
{
|
||||
(*worker)->selfTerminate();
|
||||
|
||||
// add dummy work to wake up the worker immediately for faster self termination
|
||||
PersonalWorkQueue* personalQ = (*worker)->getPersonalWorkQueue();
|
||||
workQueue->addPersonalWork(new DummyWork(), personalQ);
|
||||
}
|
||||
}
|
||||
|
||||
void App::joinWorkers()
|
||||
{
|
||||
|
||||
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
|
||||
{
|
||||
waitForComponentTermination((*worker).get());
|
||||
}
|
||||
}
|
||||
|
||||
void App::logInfos()
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, std::string("Version: ") + BEEGFS_VERSION);
|
||||
#ifdef BEEGFS_DEBUG
|
||||
LOG(GENERAL, DEBUG, "--DEBUG VERSION--");
|
||||
#endif
|
||||
|
||||
// list usable network interfaces
|
||||
NicAddressList nicList = getLocalNicList();
|
||||
logUsableNICs(NULL, nicList);
|
||||
|
||||
// print net filters
|
||||
if (netFilter->getNumFilterEntries() )
|
||||
{
|
||||
LOG(GENERAL, WARNING, std::string("Net filters: ")
|
||||
+ StringTk::uintToStr(netFilter->getNumFilterEntries() ) );
|
||||
}
|
||||
|
||||
if (tcpOnlyFilter->getNumFilterEntries() )
|
||||
{
|
||||
LOG(GENERAL, WARNING, std::string("TCP-only filters: ")
|
||||
+ StringTk::uintToStr(tcpOnlyFilter->getNumFilterEntries() ) );
|
||||
}
|
||||
}
|
||||
|
||||
void App::daemonize()
|
||||
{
|
||||
int nochdir = 1; // 1 to keep working directory
|
||||
int noclose = 0; // 1 to keep stdin/-out/-err open
|
||||
|
||||
LOG(GENERAL, CRITICAL, "Detaching process...");
|
||||
|
||||
int detachRes = daemon(nochdir, noclose);
|
||||
if (detachRes == -1)
|
||||
throw std::runtime_error(std::string("Unable to detach process: ")
|
||||
+ System::getErrString());
|
||||
|
||||
updateLockedPIDFile(pidFileLockFD); // ignored if pidFileFD is -1
|
||||
}
|
||||
|
||||
void App::handleComponentException(std::exception& e)
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, "This component encountered an unrecoverable error.", sysErr,
|
||||
("Exception", e.what()));
|
||||
|
||||
LOG(GENERAL, WARNING, "Shutting down...");
|
||||
stopComponents();
|
||||
}
|
||||
|
||||
void App::handleNetworkInterfaceFailure(const std::string& devname)
|
||||
{
|
||||
// Nothing to do. This App has no internodeSyncer that would rescan the
|
||||
// netdevs.
|
||||
LOG(GENERAL, ERR, "Network interface failure.",
|
||||
("Device", devname));
|
||||
}
|
||||
184
mon/source/app/App.h
Normal file
184
mon/source/app/App.h
Normal file
@@ -0,0 +1,184 @@
|
||||
#ifndef APP_H_
|
||||
#define APP_H_
|
||||
|
||||
#include <app/Config.h>
|
||||
#include <common/app/AbstractApp.h>
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <common/Common.h>
|
||||
#include <common/components/worker/Worker.h>
|
||||
#include <common/nodes/LocalNode.h>
|
||||
#include <common/nodes/NodeStoreClients.h>
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/toolkit/NetFilter.h>
|
||||
#include <common/toolkit/NodesTk.h>
|
||||
#include <misc/TSDatabase.h>
|
||||
#include <components/CleanUp.h>
|
||||
#include <components/StatsCollector.h>
|
||||
#include <components/NodeListRequestor.h>
|
||||
#include <net/message/NetMessageFactory.h>
|
||||
#include <nodes/NodeStoreMetaEx.h>
|
||||
#include <nodes/NodeStoreStorageEx.h>
|
||||
#include <nodes/NodeStoreMgmtEx.h>
|
||||
|
||||
class App : public AbstractApp
|
||||
{
|
||||
public:
|
||||
enum AppCode
|
||||
{
|
||||
NO_ERROR = 0,
|
||||
INVALID_CONFIG = 1,
|
||||
INITIALIZATION_ERROR = 2,
|
||||
RUNTIME_ERROR = 3
|
||||
};
|
||||
|
||||
|
||||
App(int argc, char** argv);
|
||||
|
||||
virtual void run() override;
|
||||
virtual void stopComponents() override;
|
||||
virtual void handleComponentException(std::exception& e) override;
|
||||
virtual void handleNetworkInterfaceFailure(const std::string& devname) override;
|
||||
|
||||
|
||||
private:
|
||||
int appResult;
|
||||
int argc;
|
||||
char** argv;
|
||||
LockFD pidFileLockFD;
|
||||
|
||||
std::unique_ptr<TargetMapper> targetMapper;
|
||||
|
||||
std::unique_ptr<Config> cfg;
|
||||
std::unique_ptr<NetFilter> netFilter;
|
||||
std::unique_ptr<NetFilter> tcpOnlyFilter;
|
||||
std::unique_ptr<NetMessageFactory> netMessageFactory;
|
||||
NicListCapabilities localNicCaps;
|
||||
std::shared_ptr<Node> localNode;
|
||||
std::unique_ptr<TSDatabase> tsdb;
|
||||
std::unique_ptr<MultiWorkQueue> workQueue;
|
||||
|
||||
std::unique_ptr<NodeStoreMgmtEx> mgmtNodes;
|
||||
std::unique_ptr<NodeStoreMetaEx> metaNodes;
|
||||
std::unique_ptr<NodeStoreStorageEx> storageNodes;
|
||||
std::unique_ptr<MirrorBuddyGroupMapper> metaBuddyGroupMapper;
|
||||
std::unique_ptr<MirrorBuddyGroupMapper> storageBuddyGroupMapper;
|
||||
|
||||
std::unique_ptr<NodeListRequestor> nodeListRequestor;
|
||||
std::unique_ptr<StatsCollector> statsCollector;
|
||||
std::unique_ptr<CleanUp> cleanUp;
|
||||
|
||||
std::list<std::unique_ptr<Worker>> workerList;
|
||||
|
||||
void printOrLogError(const std::string& text) const;
|
||||
|
||||
void runNormal();
|
||||
void initDataObjects();
|
||||
void initComponents();
|
||||
void startComponents();
|
||||
void joinComponents();
|
||||
void initWorkers();
|
||||
void startWorkers();
|
||||
void stopWorkers();
|
||||
void joinWorkers();
|
||||
void initLocalNodeInfo();
|
||||
void logInfos();
|
||||
void daemonize();
|
||||
|
||||
public:
|
||||
NodeStoreServers* getServerStoreFromType(NodeType nodeType)
|
||||
{
|
||||
switch (nodeType)
|
||||
{
|
||||
case NODETYPE_Meta:
|
||||
return metaNodes.get();
|
||||
|
||||
case NODETYPE_Storage:
|
||||
return storageNodes.get();
|
||||
|
||||
case NODETYPE_Mgmt:
|
||||
return mgmtNodes.get();
|
||||
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
virtual ICommonConfig* getCommonConfig() const override
|
||||
{
|
||||
return cfg.get();
|
||||
}
|
||||
|
||||
virtual NetFilter* getNetFilter() const override
|
||||
{
|
||||
return netFilter.get();
|
||||
}
|
||||
|
||||
virtual NetFilter* getTcpOnlyFilter() const override
|
||||
{
|
||||
return tcpOnlyFilter.get();
|
||||
}
|
||||
|
||||
virtual AbstractNetMessageFactory* getNetMessageFactory() const override
|
||||
{
|
||||
return netMessageFactory.get();
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> getLocalNode()
|
||||
{
|
||||
return localNode;
|
||||
}
|
||||
|
||||
Config* getConfig()
|
||||
{
|
||||
return cfg.get();
|
||||
}
|
||||
|
||||
MultiWorkQueue *getWorkQueue()
|
||||
{
|
||||
return workQueue.get();
|
||||
}
|
||||
|
||||
NodeStoreMetaEx *getMetaNodes()
|
||||
{
|
||||
return metaNodes.get();
|
||||
}
|
||||
|
||||
NodeStoreStorageEx *getStorageNodes()
|
||||
{
|
||||
return storageNodes.get();
|
||||
}
|
||||
|
||||
NodeStoreMgmtEx *getMgmtNodes()
|
||||
{
|
||||
return mgmtNodes.get();
|
||||
}
|
||||
|
||||
TSDatabase *getTSDB()
|
||||
{
|
||||
return tsdb.get();
|
||||
}
|
||||
|
||||
TargetMapper* getTargetMapper()
|
||||
{
|
||||
return targetMapper.get();
|
||||
}
|
||||
|
||||
MirrorBuddyGroupMapper* getMetaBuddyGroupMapper()
|
||||
{
|
||||
return metaBuddyGroupMapper.get();
|
||||
}
|
||||
|
||||
MirrorBuddyGroupMapper* getStorageBuddyGroupMapper()
|
||||
{
|
||||
return storageBuddyGroupMapper.get();
|
||||
}
|
||||
|
||||
int getAppResult()
|
||||
{
|
||||
return appResult;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif /*APP_H_*/
|
||||
210
mon/source/app/Config.cpp
Normal file
210
mon/source/app/Config.cpp
Normal file
@@ -0,0 +1,210 @@
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include "Config.h"
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#define CONFIG_DEFAULT_CFGFILENAME "/etc/beegfs/beegfs-mon.conf"
|
||||
|
||||
Config::Config(int argc, char** argv): AbstractConfig(argc, argv)
|
||||
{
|
||||
initConfig(argc, argv, true);
|
||||
|
||||
// check mandatory value
|
||||
if(getSysMgmtdHost().empty())
|
||||
throw InvalidConfigException("Management host undefined.");
|
||||
|
||||
// Load auth config file
|
||||
if (!dbAuthFile.empty())
|
||||
{
|
||||
std::ifstream authConfig(dbAuthFile);
|
||||
|
||||
if (!authConfig.good())
|
||||
throw InvalidConfigException("Could not open InfluxDB authentication file");
|
||||
|
||||
StringMap authMap;
|
||||
MapTk::loadStringMapFromFile(dbAuthFile.c_str(), &authMap);
|
||||
|
||||
for (const auto& e : authMap) {
|
||||
if (e.first == "password") {
|
||||
dbAuthPassword = e.second;
|
||||
} else if (e.first == "username") {
|
||||
dbAuthUsername = e.second;
|
||||
} else if (e.first == "organization") {
|
||||
dbAuthOrg = e.second;
|
||||
} else if (e.first == "token") {
|
||||
dbAuthToken = e.second;
|
||||
} else {
|
||||
throw InvalidConfigException("The InfluxDB authentication file may only contain "
|
||||
"the options username and password for influxdb version 1.x "
|
||||
"organization and token for influxdb version 2.x" );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Config::loadDefaults(bool addDashes)
|
||||
{
|
||||
AbstractConfig::loadDefaults();
|
||||
|
||||
// re-definitions
|
||||
configMapRedefine("cfgFile", "");
|
||||
configMapRedefine("connUseRDMA", "false");
|
||||
|
||||
// own definitions
|
||||
configMapRedefine("connInterfacesFile", "");
|
||||
configMapRedefine("tuneNumWorkers", "4");
|
||||
configMapRedefine("runDaemonized", "false");
|
||||
configMapRedefine("pidFile", "");
|
||||
|
||||
configMapRedefine("dbType", "influxdb");
|
||||
configMapRedefine("dbHostName", "localhost");
|
||||
configMapRedefine("dbHostPort", "8086");
|
||||
configMapRedefine("dbDatabase", "beegfs_mon");
|
||||
configMapRedefine("dbAuthFile", "");
|
||||
|
||||
// those are used by influxdb only but are kept like this for compatibility
|
||||
configMapRedefine("dbMaxPointsPerRequest", "5000");
|
||||
configMapRedefine("dbSetRetentionPolicy", "true");
|
||||
configMapRedefine("dbRetentionDuration", "1d");
|
||||
|
||||
configMapRedefine("dbBucket", "");
|
||||
|
||||
configMapRedefine("cassandraMaxInsertsPerBatch","25");
|
||||
configMapRedefine("cassandraTTLSecs", "86400");
|
||||
|
||||
configMapRedefine("collectClientOpsByNode", "true");
|
||||
configMapRedefine("collectClientOpsByUser", "true");
|
||||
|
||||
configMapRedefine("httpTimeoutMSecs", "1000");
|
||||
configMapRedefine("statsRequestIntervalSecs", "5");
|
||||
configMapRedefine("nodelistRequestIntervalSecs","30");
|
||||
|
||||
configMapRedefine("curlCheckSSLCertificates", "true");
|
||||
|
||||
}
|
||||
|
||||
void Config::applyConfigMap(bool enableException, bool addDashes)
|
||||
{
|
||||
AbstractConfig::applyConfigMap(false);
|
||||
|
||||
for (StringMapIter iter = configMap.begin(); iter != configMap.end(); )
|
||||
{
|
||||
bool unknownElement = false;
|
||||
|
||||
if (iter->first == std::string("logType"))
|
||||
{
|
||||
if (iter->second == "syslog")
|
||||
{
|
||||
logType = LogType_SYSLOG;
|
||||
}
|
||||
else if (iter->second == "logfile")
|
||||
{
|
||||
logType = LogType_LOGFILE;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw InvalidConfigException("The value of config argument logType is invalid:"
|
||||
" Must be syslog or logfile.");
|
||||
}
|
||||
}
|
||||
else if (iter->first == std::string("connInterfacesFile"))
|
||||
connInterfacesFile = iter->second;
|
||||
else
|
||||
if (iter->first == std::string("tuneNumWorkers"))
|
||||
tuneNumWorkers = StringTk::strToUInt(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("runDaemonized"))
|
||||
runDaemonized = StringTk::strToBool(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("pidFile"))
|
||||
pidFile = iter->second;
|
||||
else
|
||||
if (iter->first == std::string("dbType"))
|
||||
{
|
||||
if (iter->second == "influxdb")
|
||||
dbType = DbTypes::INFLUXDB;
|
||||
else if (iter->second == "influxdb2")
|
||||
dbType = DbTypes::INFLUXDB2;
|
||||
else if (iter->second == "cassandra")
|
||||
dbType = DbTypes::CASSANDRA;
|
||||
else
|
||||
throw InvalidConfigException("The value of config argument dbType is invalid:"
|
||||
" Must be influxdb or cassandra.");
|
||||
}
|
||||
else
|
||||
if (iter->first == std::string("dbHostName"))
|
||||
dbHostName = iter->second;
|
||||
else
|
||||
if (iter->first == std::string("dbHostPort"))
|
||||
dbHostPort = StringTk::strToUInt(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("dbDatabase"))
|
||||
dbDatabase = iter->second;
|
||||
else
|
||||
if (iter->first == std::string("dbAuthFile"))
|
||||
dbAuthFile = iter->second;
|
||||
else
|
||||
// those are used by influxdb only but are kept like this for compatibility
|
||||
if (iter->first == std::string("dbMaxPointsPerRequest"))
|
||||
influxdbMaxPointsPerRequest = StringTk::strToUInt(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("dbSetRetentionPolicy"))
|
||||
influxdbSetRetentionPolicy = StringTk::strToBool(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("dbRetentionDuration"))
|
||||
influxdbRetentionDuration = iter->second;
|
||||
else
|
||||
// those are used by influxdb2
|
||||
if (iter->first == std::string("dbBucket"))
|
||||
dbBucket = iter->second;
|
||||
else
|
||||
|
||||
if (iter->first == std::string("cassandraMaxInsertsPerBatch"))
|
||||
cassandraMaxInsertsPerBatch = StringTk::strToUInt(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("cassandraTTLSecs"))
|
||||
cassandraTTLSecs = StringTk::strToUInt(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("collectClientOpsByNode"))
|
||||
collectClientOpsByNode = StringTk::strToBool(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("collectClientOpsByUser"))
|
||||
collectClientOpsByUser = StringTk::strToBool(iter->second);
|
||||
else
|
||||
if (iter->first == std::string("httpTimeoutMSecs"))
|
||||
httpTimeout = std::chrono::milliseconds(StringTk::strToUInt(iter->second));
|
||||
else
|
||||
if (iter->first == std::string("statsRequestIntervalSecs"))
|
||||
statsRequestInterval = std::chrono::seconds(StringTk::strToUInt(iter->second));
|
||||
else
|
||||
if (iter->first == std::string("nodelistRequestIntervalSecs"))
|
||||
nodelistRequestInterval = std::chrono::seconds(StringTk::strToUInt(iter->second));
|
||||
else
|
||||
if (iter->first == std::string("curlCheckSSLCertificates"))
|
||||
curlCheckSSLCertificates = StringTk::strToBool(iter->second);
|
||||
else
|
||||
{
|
||||
unknownElement = true;
|
||||
|
||||
if (enableException)
|
||||
{
|
||||
throw InvalidConfigException(std::string("The config argument '")
|
||||
+ iter->first + std::string("' is invalid.") );
|
||||
}
|
||||
}
|
||||
|
||||
if (unknownElement)
|
||||
{
|
||||
iter++;
|
||||
}
|
||||
else
|
||||
{
|
||||
iter = eraseFromConfigMap(iter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Config::initImplicitVals()
|
||||
{
|
||||
AbstractConfig::initConnAuthHash(connAuthFile, &connAuthHash);
|
||||
}
|
||||
179
mon/source/app/Config.h
Normal file
179
mon/source/app/Config.h
Normal file
@@ -0,0 +1,179 @@
|
||||
#ifndef CONFIG_H_
|
||||
#define CONFIG_H_
|
||||
|
||||
#include <common/app/config/AbstractConfig.h>
|
||||
|
||||
|
||||
class Config : public AbstractConfig
|
||||
{
|
||||
public:
|
||||
Config(int argc, char** argv);
|
||||
|
||||
enum DbTypes
|
||||
{
|
||||
INFLUXDB,
|
||||
INFLUXDB2,
|
||||
CASSANDRA
|
||||
};
|
||||
|
||||
private:
|
||||
// configurables
|
||||
std::string connInterfacesFile;
|
||||
unsigned tuneNumWorkers;
|
||||
bool runDaemonized;
|
||||
std::string pidFile;
|
||||
|
||||
// mon-specific configurables
|
||||
DbTypes dbType;
|
||||
std::string dbHostName;
|
||||
unsigned dbHostPort;
|
||||
std::string dbDatabase;
|
||||
std::string dbBucket;
|
||||
std::string dbAuthFile;
|
||||
unsigned influxdbMaxPointsPerRequest;
|
||||
bool influxdbSetRetentionPolicy;
|
||||
std::string influxdbRetentionDuration;
|
||||
unsigned cassandraMaxInsertsPerBatch;
|
||||
unsigned cassandraTTLSecs;
|
||||
bool collectClientOpsByNode;
|
||||
bool collectClientOpsByUser;
|
||||
std::chrono::milliseconds httpTimeout;
|
||||
std::chrono::seconds statsRequestInterval;
|
||||
std::chrono::seconds nodelistRequestInterval;
|
||||
bool curlCheckSSLCertificates;
|
||||
|
||||
std::string dbAuthUsername;
|
||||
std::string dbAuthPassword;
|
||||
std::string dbAuthOrg;
|
||||
std::string dbAuthToken;
|
||||
|
||||
|
||||
virtual void loadDefaults(bool addDashes) override;
|
||||
virtual void applyConfigMap(bool enableException, bool addDashes) override;
|
||||
virtual void initImplicitVals() override;
|
||||
|
||||
public:
|
||||
// getters & setters
|
||||
|
||||
const std::string& getConnInterfacesFile() const
|
||||
{
|
||||
return connInterfacesFile;
|
||||
}
|
||||
|
||||
unsigned getTuneNumWorkers() const
|
||||
{
|
||||
return tuneNumWorkers;
|
||||
}
|
||||
|
||||
bool getRunDaemonized() const
|
||||
{
|
||||
return runDaemonized;
|
||||
}
|
||||
|
||||
const std::string& getPIDFile() const
|
||||
{
|
||||
return pidFile;
|
||||
}
|
||||
|
||||
DbTypes getDbType() const
|
||||
{
|
||||
return dbType;
|
||||
}
|
||||
|
||||
const std::string& getDbHostName() const
|
||||
{
|
||||
return dbHostName;
|
||||
}
|
||||
|
||||
unsigned getDbHostPort() const
|
||||
{
|
||||
return dbHostPort;
|
||||
}
|
||||
|
||||
const std::string& getDbDatabase() const
|
||||
{
|
||||
return dbDatabase;
|
||||
}
|
||||
|
||||
const std::string& getDbBucket() const
|
||||
{
|
||||
return dbBucket;
|
||||
}
|
||||
|
||||
unsigned getInfluxdbMaxPointsPerRequest() const
|
||||
{
|
||||
return influxdbMaxPointsPerRequest;
|
||||
}
|
||||
|
||||
bool getInfluxDbSetRetentionPolicy() const
|
||||
{
|
||||
return influxdbSetRetentionPolicy;
|
||||
}
|
||||
|
||||
const std::string& getInfluxDbRetentionDuration() const
|
||||
{
|
||||
return influxdbRetentionDuration;
|
||||
}
|
||||
|
||||
unsigned getCassandraMaxInsertsPerBatch() const
|
||||
{
|
||||
return cassandraMaxInsertsPerBatch;
|
||||
}
|
||||
|
||||
unsigned getCassandraTTLSecs() const
|
||||
{
|
||||
return cassandraTTLSecs;
|
||||
}
|
||||
|
||||
bool getCollectClientOpsByNode() const
|
||||
{
|
||||
return collectClientOpsByNode;
|
||||
}
|
||||
|
||||
bool getCollectClientOpsByUser() const
|
||||
{
|
||||
return collectClientOpsByUser;
|
||||
}
|
||||
|
||||
const std::chrono::milliseconds& getHttpTimeout() const
|
||||
{
|
||||
return httpTimeout;
|
||||
}
|
||||
|
||||
const std::chrono::seconds& getStatsRequestInterval() const
|
||||
{
|
||||
return statsRequestInterval;
|
||||
}
|
||||
|
||||
const std::chrono::seconds& getNodelistRequestInterval() const
|
||||
{
|
||||
return nodelistRequestInterval;
|
||||
}
|
||||
|
||||
const std::string& getDbAuthUsername() const
|
||||
{
|
||||
return dbAuthUsername;
|
||||
}
|
||||
|
||||
const std::string& getDbAuthPassword() const
|
||||
{
|
||||
return dbAuthPassword;
|
||||
}
|
||||
|
||||
const std::string& getDbAuthOrg() const
|
||||
{
|
||||
return dbAuthOrg;
|
||||
}
|
||||
|
||||
const std::string& getDbAuthToken() const
|
||||
{
|
||||
return dbAuthToken;
|
||||
}
|
||||
|
||||
bool getCurlCheckSSLCertificates() const
|
||||
{
|
||||
return curlCheckSSLCertificates;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /*CONFIG_H_*/
|
||||
14
mon/source/app/Main.cpp
Normal file
14
mon/source/app/Main.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
#include <common/toolkit/BuildTypeTk.h>
|
||||
#include <app/SignalHandler.h>
|
||||
#include <app/App.h>
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
BuildTypeTk::checkDebugBuildTypes();
|
||||
AbstractApp::runTimeInitsAndChecks();
|
||||
|
||||
App app(argc, argv);
|
||||
app.startInCurrentThread();
|
||||
|
||||
return app.getAppResult();
|
||||
}
|
||||
49
mon/source/app/SignalHandler.cpp
Normal file
49
mon/source/app/SignalHandler.cpp
Normal file
@@ -0,0 +1,49 @@
|
||||
#include "SignalHandler.h"
|
||||
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <app/App.h>
|
||||
|
||||
#include <csignal>
|
||||
|
||||
App* SignalHandler::app = nullptr;
|
||||
|
||||
void SignalHandler::registerSignalHandler(App* app)
|
||||
{
|
||||
SignalHandler::app = app;
|
||||
signal(SIGINT, SignalHandler::handle);
|
||||
signal(SIGTERM, SignalHandler::handle);
|
||||
}
|
||||
|
||||
|
||||
void SignalHandler::handle(int sig)
|
||||
{
|
||||
// reset signal handling to default
|
||||
signal(sig, SIG_DFL);
|
||||
|
||||
if (Logger::isInitialized())
|
||||
{
|
||||
switch(sig)
|
||||
{
|
||||
case SIGINT:
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, "Received a SIGINT. Shutting down...");
|
||||
} break;
|
||||
|
||||
case SIGTERM:
|
||||
{
|
||||
LOG(GENERAL, CRITICAL, "Received a SIGTERM. Shutting down...");
|
||||
} break;
|
||||
|
||||
default:
|
||||
{
|
||||
// shouldn't happen
|
||||
LOG(GENERAL, CRITICAL, "Received an unknown signal. Shutting down...");
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
if (app != nullptr)
|
||||
{
|
||||
app->stopComponents();
|
||||
}
|
||||
}
|
||||
16
mon/source/app/SignalHandler.h
Normal file
16
mon/source/app/SignalHandler.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef SIGNAL_HANDLER_H_
|
||||
#define SIGNAL_HANDLER_H_
|
||||
|
||||
class App;
|
||||
|
||||
class SignalHandler
|
||||
{
|
||||
public:
|
||||
static void registerSignalHandler(App* app);
|
||||
static void handle(int sig);
|
||||
|
||||
private:
|
||||
static App* app;
|
||||
};
|
||||
|
||||
#endif
|
||||
67
mon/source/components/CleanUp.cpp
Normal file
67
mon/source/components/CleanUp.cpp
Normal file
@@ -0,0 +1,67 @@
|
||||
#include "CleanUp.h"
|
||||
|
||||
#include <app/App.h>
|
||||
|
||||
CleanUp::CleanUp(App* app) :
|
||||
PThread("CleanUp"), app(app)
|
||||
{}
|
||||
|
||||
void CleanUp::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Component started.");
|
||||
registerSignalHandler();
|
||||
loop();
|
||||
LOG(GENERAL, DEBUG, "Component stopped.");
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
app->handleComponentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
void CleanUp::loop()
|
||||
{
|
||||
const std::chrono::minutes idleDisconnectInterval(30);
|
||||
|
||||
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(idleDisconnectInterval).count()))
|
||||
{
|
||||
dropIdleConns();
|
||||
}
|
||||
}
|
||||
|
||||
void CleanUp::dropIdleConns()
|
||||
{
|
||||
unsigned numDroppedConns = 0;
|
||||
|
||||
numDroppedConns += dropIdleConnsByStore(app->getMgmtNodes());
|
||||
numDroppedConns += dropIdleConnsByStore(app->getMetaNodes());
|
||||
numDroppedConns += dropIdleConnsByStore(app->getStorageNodes());
|
||||
|
||||
if (numDroppedConns)
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Idle connections dropped", numDroppedConns);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned CleanUp::dropIdleConnsByStore(NodeStoreServers* nodes)
|
||||
{
|
||||
unsigned numDroppedConns = 0;
|
||||
|
||||
const auto referencedNodes = nodes->referenceAllNodes();
|
||||
for (auto node = referencedNodes.begin(); node != referencedNodes.end();
|
||||
node++)
|
||||
{
|
||||
// don't do any idle disconnect stuff with local node
|
||||
// (the LocalNodeConnPool doesn't support and doesn't need this kind of treatment)
|
||||
if (*node != app->getLocalNode())
|
||||
{
|
||||
auto connPool = (*node)->getConnPool();
|
||||
|
||||
numDroppedConns += connPool->disconnectAndResetIdleStreams();
|
||||
}
|
||||
}
|
||||
|
||||
return numDroppedConns;
|
||||
}
|
||||
24
mon/source/components/CleanUp.h
Normal file
24
mon/source/components/CleanUp.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef CLEANUP_H_
|
||||
#define CLEANUP_H_
|
||||
|
||||
#include <common/threading/PThread.h>
|
||||
#include <common/nodes/NodeStoreServers.h>
|
||||
|
||||
class App;
|
||||
|
||||
class CleanUp : public PThread
|
||||
{
|
||||
public:
|
||||
CleanUp(App* app);
|
||||
|
||||
private:
|
||||
App* const app;
|
||||
virtual void run() override;
|
||||
void loop();
|
||||
void dropIdleConns();
|
||||
unsigned dropIdleConnsByStore(NodeStoreServers* nodes);
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif /* CLEANUP_H_ */
|
||||
91
mon/source/components/NodeListRequestor.cpp
Normal file
91
mon/source/components/NodeListRequestor.cpp
Normal file
@@ -0,0 +1,91 @@
|
||||
#include "NodeListRequestor.h"
|
||||
|
||||
#include <common/toolkit/NodesTk.h>
|
||||
#include <components/worker/GetNodesWork.h>
|
||||
|
||||
#include <app/App.h>
|
||||
|
||||
static const unsigned MGMT_NUM_TRIES = 3;
|
||||
static const std::chrono::milliseconds MGMT_TIMEOUT{1000};
|
||||
|
||||
NodeListRequestor::NodeListRequestor(App* app) :
|
||||
PThread("NodeListReq"), app(app)
|
||||
{}
|
||||
|
||||
void NodeListRequestor::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Component started.");
|
||||
registerSignalHandler();
|
||||
|
||||
requestLoop();
|
||||
|
||||
LOG(GENERAL, DEBUG, "Component stopped.");
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
app->handleComponentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
void NodeListRequestor::requestLoop()
|
||||
{
|
||||
do
|
||||
{
|
||||
// Get management node. Do this every time before updating node lists to check if
|
||||
// management is online to prevent log spam from NodesTk::downloadNodes when it is
|
||||
// not reachable
|
||||
if (!getMgmtNodeInfo())
|
||||
{
|
||||
LOG(GENERAL, NOTICE, "Did not receive a response from management node!");
|
||||
continue;
|
||||
}
|
||||
|
||||
// try to reference first mgmt node (which is at the moment the only one)
|
||||
std::shared_ptr<Node> mgmtNode = app->getMgmtNodes()->referenceFirstNode();
|
||||
|
||||
if (mgmtNode)
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Requesting node lists...");
|
||||
|
||||
app->getWorkQueue()->addIndirectWork(new GetNodesWork(mgmtNode, app->getMetaNodes(),
|
||||
NODETYPE_Meta, app->getMetaBuddyGroupMapper(), app->getLocalNode()));
|
||||
app->getWorkQueue()->addIndirectWork(new GetNodesWork(mgmtNode,
|
||||
app->getStorageNodes(), NODETYPE_Storage, app->getStorageBuddyGroupMapper(),
|
||||
app->getLocalNode()));
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Unable to reference management node for node list request.");
|
||||
}
|
||||
}
|
||||
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(
|
||||
app->getConfig()->getNodelistRequestInterval()).count()));
|
||||
}
|
||||
|
||||
bool NodeListRequestor::getMgmtNodeInfo()
|
||||
{
|
||||
for (unsigned i = 0; i < MGMT_NUM_TRIES; i++)
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Waiting for management node...");
|
||||
|
||||
// get mgmtd node using NodesTk
|
||||
auto mgmtNode = NodesTk::downloadNodeInfo(app->getConfig()->getSysMgmtdHost(),
|
||||
app->getConfig()->getConnMgmtdPort(), app->getConfig()->getConnAuthHash(),
|
||||
app->getNetMessageFactory(),
|
||||
NODETYPE_Mgmt, MGMT_TIMEOUT.count());
|
||||
|
||||
if(mgmtNode)
|
||||
{
|
||||
app->getMgmtNodes()->addOrUpdateNodeEx(std::move(mgmtNode), nullptr);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (PThread::waitForSelfTerminateOrder(std::chrono::milliseconds(MGMT_TIMEOUT).count()))
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
20
mon/source/components/NodeListRequestor.h
Normal file
20
mon/source/components/NodeListRequestor.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef NODELISTREQUESTOR_H_
|
||||
#define NODELISTREQUESTOR_H_
|
||||
|
||||
#include <common/threading/PThread.h>
|
||||
|
||||
class App;
|
||||
|
||||
class NodeListRequestor : public PThread
|
||||
{
|
||||
public:
|
||||
NodeListRequestor(App* app);
|
||||
|
||||
private:
|
||||
App* const app;
|
||||
virtual void run() override;
|
||||
void requestLoop();
|
||||
bool getMgmtNodeInfo();
|
||||
};
|
||||
|
||||
#endif /*NODELISTREQUESTOR_H_*/
|
||||
206
mon/source/components/StatsCollector.cpp
Normal file
206
mon/source/components/StatsCollector.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
#include "StatsCollector.h"
|
||||
|
||||
#include <common/toolkit/SocketTk.h>
|
||||
#include <common/nodes/OpCounterTypes.h>
|
||||
|
||||
#include <app/App.h>
|
||||
|
||||
|
||||
StatsCollector::StatsCollector(App* app) :
|
||||
PThread("StatsCollector"), app(app)
|
||||
{}
|
||||
|
||||
void StatsCollector::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Component started.");
|
||||
registerSignalHandler();
|
||||
requestLoop();
|
||||
LOG(GENERAL, DEBUG, "Component stopped.");
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
app->handleComponentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
void StatsCollector::requestLoop()
|
||||
{
|
||||
bool collectClientOpsByNode = app->getConfig()->getCollectClientOpsByNode();
|
||||
bool collectClientOpsByUser = app->getConfig()->getCollectClientOpsByUser();
|
||||
|
||||
// intially wait one query interval before requesting stats to give NodeListRequestor the time
|
||||
// to retrieve the node lists
|
||||
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(
|
||||
app->getConfig()->getStatsRequestInterval()).count()))
|
||||
{
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Requesting Stats...");
|
||||
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
|
||||
workItemCounter = 0;
|
||||
metaResults.clear();
|
||||
storageResults.clear();
|
||||
|
||||
// collect data
|
||||
|
||||
const auto& metaNodes = app->getMetaNodes()->referenceAllNodes();
|
||||
|
||||
for (auto node = metaNodes.begin(); node != metaNodes.end(); node++)
|
||||
{
|
||||
workItemCounter++;
|
||||
app->getWorkQueue()->addIndirectWork(
|
||||
new RequestMetaDataWork(std::static_pointer_cast<MetaNodeEx>(*node),
|
||||
this, collectClientOpsByNode, collectClientOpsByUser));
|
||||
}
|
||||
|
||||
const auto& storageNodes = app->getStorageNodes()->referenceAllNodes();
|
||||
|
||||
for (auto node = storageNodes.begin(); node != storageNodes.end(); node++)
|
||||
{
|
||||
workItemCounter++;
|
||||
app->getWorkQueue()->addIndirectWork(
|
||||
new RequestStorageDataWork(std::static_pointer_cast<StorageNodeEx>(*node),
|
||||
this, collectClientOpsByNode, collectClientOpsByUser));
|
||||
}
|
||||
|
||||
while (workItemCounter > 0)
|
||||
condVar.wait(lock);
|
||||
|
||||
// write data
|
||||
|
||||
for (auto iter = metaResults.begin(); iter != metaResults.end(); iter++)
|
||||
{
|
||||
app->getTSDB()->insertMetaNodeData(iter->node, iter->data);
|
||||
|
||||
for (auto listIter = iter->highResStatsList.begin();
|
||||
listIter != iter->highResStatsList.end(); listIter++)
|
||||
{
|
||||
app->getTSDB()->insertHighResMetaNodeData(iter->node, *listIter);
|
||||
}
|
||||
|
||||
if (collectClientOpsByNode)
|
||||
{
|
||||
for (auto mapIter = iter->ipOpsUnorderedMap.begin();
|
||||
mapIter != iter->ipOpsUnorderedMap.end(); mapIter++)
|
||||
{
|
||||
ipMetaClientOps.addOpsList(mapIter->first, mapIter->second);
|
||||
}
|
||||
}
|
||||
|
||||
if (collectClientOpsByUser)
|
||||
{
|
||||
for (auto mapIter = iter->userOpsUnorderedMap.begin();
|
||||
mapIter != iter->userOpsUnorderedMap.end(); mapIter++)
|
||||
{
|
||||
userMetaClientOps.addOpsList(mapIter->first, mapIter->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto iter = storageResults.begin(); iter != storageResults.end(); iter++)
|
||||
{
|
||||
app->getTSDB()->insertStorageNodeData(iter->node, iter->data);
|
||||
|
||||
for (auto listIter = iter->highResStatsList.begin();
|
||||
listIter != iter->highResStatsList.end(); listIter++)
|
||||
{
|
||||
app->getTSDB()->insertHighResStorageNodeData(iter->node, *listIter);
|
||||
}
|
||||
|
||||
for (auto listIter = iter->storageTargetList.begin();
|
||||
listIter != iter->storageTargetList.end();
|
||||
listIter++)
|
||||
{
|
||||
app->getTSDB()->insertStorageTargetsData(iter->node, *listIter);
|
||||
}
|
||||
|
||||
if (collectClientOpsByNode)
|
||||
{
|
||||
for (auto mapIter = iter->ipOpsUnorderedMap.begin();
|
||||
mapIter != iter->ipOpsUnorderedMap.end(); mapIter++)
|
||||
{
|
||||
ipStorageClientOps.addOpsList(mapIter->first, mapIter->second);
|
||||
}
|
||||
}
|
||||
|
||||
if (collectClientOpsByUser)
|
||||
{
|
||||
for (auto mapIter = iter->userOpsUnorderedMap.begin();
|
||||
mapIter != iter->userOpsUnorderedMap.end(); mapIter++)
|
||||
{
|
||||
userStorageClientOps.addOpsList(mapIter->first, mapIter->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (collectClientOpsByNode)
|
||||
{
|
||||
processClientOps(ipMetaClientOps, NODETYPE_Meta, false);
|
||||
processClientOps(ipStorageClientOps, NODETYPE_Storage, false);
|
||||
}
|
||||
|
||||
if (collectClientOpsByUser)
|
||||
{
|
||||
processClientOps(userMetaClientOps, NODETYPE_Meta, true);
|
||||
processClientOps(userStorageClientOps, NODETYPE_Storage, true);
|
||||
}
|
||||
|
||||
app->getTSDB()->write();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StatsCollector::processClientOps(ClientOps& clientOps, NodeType nodeType, bool perUser)
|
||||
{
|
||||
ClientOps::IdOpsMap diffOpsMap;
|
||||
ClientOps::OpsList sumOpsList;
|
||||
|
||||
diffOpsMap = clientOps.getDiffOpsMap();
|
||||
sumOpsList = clientOps.getDiffSumOpsList();
|
||||
|
||||
if (!diffOpsMap.empty())
|
||||
{
|
||||
for (auto opsMapIter = diffOpsMap.begin();
|
||||
opsMapIter != diffOpsMap.end();
|
||||
opsMapIter++)
|
||||
{
|
||||
std::string id;
|
||||
|
||||
if (perUser)
|
||||
{
|
||||
if (opsMapIter->first == ~0U)
|
||||
id = "undefined";
|
||||
else
|
||||
id = StringTk::uintToStr(opsMapIter->first);
|
||||
}
|
||||
else
|
||||
{
|
||||
struct in_addr inAddr = { (in_addr_t)opsMapIter->first };
|
||||
id = Socket::ipaddrToStr(inAddr);
|
||||
}
|
||||
|
||||
std::map<std::string, uint64_t> stringOpMap;
|
||||
unsigned opCounter = 0;
|
||||
for (auto opsListIter = opsMapIter->second.begin();
|
||||
opsListIter != opsMapIter->second.end();
|
||||
opsListIter++)
|
||||
{
|
||||
std::string opName;
|
||||
if (nodeType == NODETYPE_Meta)
|
||||
opName = OpToStringMapping::mapMetaOpNum(opCounter);
|
||||
else if (nodeType == NODETYPE_Storage)
|
||||
opName = OpToStringMapping::mapStorageOpNum(opCounter);
|
||||
|
||||
stringOpMap[opName] = *opsListIter;
|
||||
opCounter++;
|
||||
}
|
||||
|
||||
app->getTSDB()->insertClientNodeData(id, nodeType, stringOpMap, perUser);
|
||||
}
|
||||
}
|
||||
|
||||
clientOps.clear();
|
||||
}
|
||||
56
mon/source/components/StatsCollector.h
Normal file
56
mon/source/components/StatsCollector.h
Normal file
@@ -0,0 +1,56 @@
|
||||
#ifndef STATSCOLLECTOR_H_
|
||||
#define STATSCOLLECTOR_H_
|
||||
|
||||
#include <common/threading/PThread.h>
|
||||
#include <components/worker/RequestMetaDataWork.h>
|
||||
#include <components/worker/RequestStorageDataWork.h>
|
||||
#include <common/nodes/ClientOps.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
|
||||
class App;
|
||||
|
||||
class StatsCollector : public PThread
|
||||
{
|
||||
friend class RequestMetaDataWork;
|
||||
friend class RequestStorageDataWork;
|
||||
|
||||
public:
|
||||
StatsCollector(App* app);
|
||||
|
||||
private:
|
||||
App* const app;
|
||||
ClientOps ipMetaClientOps;
|
||||
ClientOps ipStorageClientOps;
|
||||
ClientOps userMetaClientOps;
|
||||
ClientOps userStorageClientOps;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
int workItemCounter;
|
||||
std::list<RequestMetaDataWork::Result> metaResults;
|
||||
std::list<RequestStorageDataWork::Result> storageResults;
|
||||
std::condition_variable condVar;
|
||||
|
||||
virtual void run() override;
|
||||
void requestLoop();
|
||||
void processClientOps(ClientOps& clientOps, NodeType nodeType, bool perUser);
|
||||
|
||||
void insertMetaData(RequestMetaDataWork::Result result)
|
||||
{
|
||||
const std::unique_lock<std::mutex> lock(mutex);
|
||||
metaResults.push_back(std::move(result));
|
||||
workItemCounter--;
|
||||
condVar.notify_one();
|
||||
}
|
||||
|
||||
void insertStorageData(RequestStorageDataWork::Result result)
|
||||
{
|
||||
const std::unique_lock<std::mutex> lock(mutex);
|
||||
storageResults.push_back(std::move(result));
|
||||
workItemCounter--;
|
||||
condVar.notify_one();
|
||||
}
|
||||
};
|
||||
|
||||
#endif /*STATSCOLLECTOR_H_*/
|
||||
40
mon/source/components/worker/GetNodesWork.cpp
Normal file
40
mon/source/components/worker/GetNodesWork.cpp
Normal file
@@ -0,0 +1,40 @@
|
||||
#include "GetNodesWork.h"
|
||||
|
||||
#include <common/toolkit/NodesTk.h>
|
||||
|
||||
void GetNodesWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
|
||||
{
|
||||
std::vector<std::shared_ptr<Node>> nodesList;
|
||||
std::list<NumNodeID> addedNodes;
|
||||
std::list<NumNodeID> removedNodes;
|
||||
|
||||
|
||||
|
||||
if (NodesTk::downloadNodes(*mgmtdNode, nodeType, nodesList, false))
|
||||
{
|
||||
// sync the downloaded list with the node store
|
||||
nodes->syncNodes(nodesList, &addedNodes, &removedNodes, localNode.get());
|
||||
|
||||
if (!addedNodes.empty())
|
||||
LOG(GENERAL, WARNING, "Nodes added.", ("addedNodes", addedNodes.size()), nodeType);
|
||||
|
||||
if (!removedNodes.empty())
|
||||
LOG(GENERAL, WARNING, "Nodes removed.", ("removedNodes", removedNodes.size()), nodeType);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(GENERAL, ERR, "Couldn't download server list from management daemon.", nodeType);
|
||||
}
|
||||
|
||||
std::list<uint16_t> buddyGroupIDList;
|
||||
std::list<uint16_t> primaryTargetIDList;
|
||||
std::list<uint16_t> secondaryTargetIDList;
|
||||
|
||||
// update the storage buddy groups
|
||||
if (NodesTk::downloadMirrorBuddyGroups(*mgmtdNode, nodeType, &buddyGroupIDList,
|
||||
&primaryTargetIDList, &secondaryTargetIDList, false) )
|
||||
{
|
||||
buddyGroupMapper->syncGroupsFromLists(buddyGroupIDList, primaryTargetIDList,
|
||||
secondaryTargetIDList, NumNodeID());
|
||||
}
|
||||
}
|
||||
32
mon/source/components/worker/GetNodesWork.h
Normal file
32
mon/source/components/worker/GetNodesWork.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef GETNODESWORK_H_
|
||||
#define GETNODESWORK_H_
|
||||
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/nodes/MirrorBuddyGroupMapper.h>
|
||||
#include <common/nodes/NodeType.h>
|
||||
#include <common/nodes/NodeStoreServers.h>
|
||||
|
||||
class GetNodesWork : public Work
|
||||
{
|
||||
public:
|
||||
GetNodesWork(std::shared_ptr<Node> mgmtdNode, NodeStoreServers *nodes, NodeType nodeType,
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper, std::shared_ptr<Node> localNode)
|
||||
: mgmtdNode(std::move(mgmtdNode)),
|
||||
nodes(nodes),
|
||||
nodeType(nodeType),
|
||||
buddyGroupMapper(buddyGroupMapper),
|
||||
localNode(localNode)
|
||||
{}
|
||||
|
||||
virtual void process(char* bufIn, unsigned bufInLen,
|
||||
char* bufOut, unsigned bufOutLen) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<Node> mgmtdNode;
|
||||
NodeStoreServers* nodes;
|
||||
NodeType nodeType;
|
||||
MirrorBuddyGroupMapper* buddyGroupMapper;
|
||||
std::shared_ptr<Node> localNode;
|
||||
};
|
||||
|
||||
#endif /*GETNODESWORK_H_*/
|
||||
69
mon/source/components/worker/RequestMetaDataWork.cpp
Normal file
69
mon/source/components/worker/RequestMetaDataWork.cpp
Normal file
@@ -0,0 +1,69 @@
|
||||
#include "RequestMetaDataWork.h"
|
||||
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
|
||||
#include <common/net/message/mon/RequestMetaDataMsg.h>
|
||||
#include <common/net/message/mon/RequestMetaDataRespMsg.h>
|
||||
#include <components/StatsCollector.h>
|
||||
|
||||
void RequestMetaDataWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
|
||||
{
|
||||
|
||||
if (!node->getIsResponding())
|
||||
{
|
||||
HeartbeatRequestMsg heartbeatRequestMsg;
|
||||
if(MessagingTk::requestResponse(*node, heartbeatRequestMsg,
|
||||
NETMSGTYPE_Heartbeat))
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Node is responding again.",
|
||||
("NodeID", node->getNodeIDWithTypeStr()));
|
||||
node->setIsResponding(true);
|
||||
}
|
||||
}
|
||||
|
||||
Result result = {};
|
||||
result.data.isResponding = false;
|
||||
|
||||
if (node->getIsResponding())
|
||||
{
|
||||
// generate the RequestDataMsg with the lastStatsTime
|
||||
RequestMetaDataMsg requestDataMsg(node->getLastStatRequestTime().count());
|
||||
auto respMsg = MessagingTk::requestResponse(*node, requestDataMsg,
|
||||
NETMSGTYPE_RequestMetaDataResp);
|
||||
|
||||
if (!respMsg)
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Node is not responding.", ("NodeID", node->getNodeIDWithTypeStr()));
|
||||
node->setIsResponding(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
// get response and process it
|
||||
auto metaRspMsg = static_cast<RequestMetaDataRespMsg*>(respMsg.get());
|
||||
result.highResStatsList = std::move(metaRspMsg->getStatsList());
|
||||
|
||||
result.data.isResponding = true;
|
||||
result.data.indirectWorkListSize = metaRspMsg->getIndirectWorkListSize();
|
||||
result.data.directWorkListSize = metaRspMsg->getDirectWorkListSize();
|
||||
result.data.sessionCount = metaRspMsg->getSessionCount();
|
||||
result.data.hostnameid = metaRspMsg->gethostnameid();
|
||||
|
||||
if (!result.highResStatsList.empty())
|
||||
{
|
||||
auto lastStatsRequestTime = std::chrono::milliseconds(
|
||||
result.highResStatsList.front().rawVals.statsTimeMS);
|
||||
node->setLastStatRequestTime(lastStatsRequestTime);
|
||||
}
|
||||
|
||||
if (collectClientOpsByNode)
|
||||
result.ipOpsUnorderedMap = ClientOpsRequestor::request(*node, false);
|
||||
|
||||
if (collectClientOpsByUser)
|
||||
result.userOpsUnorderedMap = ClientOpsRequestor::request(*node, true);
|
||||
}
|
||||
}
|
||||
|
||||
result.node = std::move(node);
|
||||
|
||||
statsCollector->insertMetaData(std::move(result));
|
||||
}
|
||||
42
mon/source/components/worker/RequestMetaDataWork.h
Normal file
42
mon/source/components/worker/RequestMetaDataWork.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef REQUESTMETADATAWORK_H_
|
||||
#define REQUESTMETADATAWORK_H_
|
||||
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/nodes/ClientOps.h>
|
||||
#include <misc/TSDatabase.h>
|
||||
#include <nodes/MetaNodeEx.h>
|
||||
|
||||
class StatsCollector;
|
||||
|
||||
class RequestMetaDataWork : public Work
|
||||
{
|
||||
public:
|
||||
struct Result
|
||||
{
|
||||
std::shared_ptr<MetaNodeEx> node;
|
||||
MetaNodeDataContent data;
|
||||
HighResStatsList highResStatsList;
|
||||
ClientOpsRequestor::IdOpsUnorderedMap ipOpsUnorderedMap;
|
||||
ClientOpsRequestor::IdOpsUnorderedMap userOpsUnorderedMap;
|
||||
};
|
||||
|
||||
RequestMetaDataWork(std::shared_ptr<MetaNodeEx> node,
|
||||
StatsCollector* statsCollector,
|
||||
bool collectClientOpsByNode, bool collectClientOpsByUser) :
|
||||
node(std::move(node)),
|
||||
statsCollector(statsCollector),
|
||||
collectClientOpsByNode(collectClientOpsByNode),
|
||||
collectClientOpsByUser(collectClientOpsByUser)
|
||||
{}
|
||||
|
||||
virtual void process(char* bufIn, unsigned bufInLen,
|
||||
char* bufOut, unsigned bufOutLen) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<MetaNodeEx> node;
|
||||
StatsCollector* statsCollector;
|
||||
bool collectClientOpsByNode;
|
||||
bool collectClientOpsByUser;
|
||||
};
|
||||
|
||||
#endif /*REQUESTMETADATAWORK_H_*/
|
||||
74
mon/source/components/worker/RequestStorageDataWork.cpp
Normal file
74
mon/source/components/worker/RequestStorageDataWork.cpp
Normal file
@@ -0,0 +1,74 @@
|
||||
#include "RequestStorageDataWork.h"
|
||||
|
||||
#include <common/toolkit/MessagingTk.h>
|
||||
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
|
||||
#include <common/net/message/mon/RequestStorageDataMsg.h>
|
||||
#include <common/net/message/mon/RequestStorageDataRespMsg.h>
|
||||
#include <components/StatsCollector.h>
|
||||
|
||||
void RequestStorageDataWork::process(char* bufIn, unsigned bufInLen,
|
||||
char* bufOut, unsigned bufOutLen)
|
||||
{
|
||||
|
||||
if (!node->getIsResponding())
|
||||
{
|
||||
HeartbeatRequestMsg heartbeatRequestMsg;
|
||||
|
||||
if(MessagingTk::requestResponse(*node, heartbeatRequestMsg,
|
||||
NETMSGTYPE_Heartbeat))
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Node is responding again.",
|
||||
("NodeID", node->getNodeIDWithTypeStr()));
|
||||
node->setIsResponding(true);
|
||||
}
|
||||
}
|
||||
|
||||
Result result = {};
|
||||
result.data.isResponding = false;
|
||||
|
||||
if (node->getIsResponding())
|
||||
{
|
||||
// generate the RequestStorageDataMsg with the lastStatsTime
|
||||
RequestStorageDataMsg requestDataMsg(node->getLastStatRequestTime().count());
|
||||
auto respMsg = MessagingTk::requestResponse(*node, requestDataMsg,
|
||||
NETMSGTYPE_RequestStorageDataResp);
|
||||
|
||||
if (!respMsg)
|
||||
{
|
||||
LOG(GENERAL, DEBUG, "Node is not responding.", ("NodeID", node->getNodeIDWithTypeStr()));
|
||||
node->setIsResponding(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
// get response and process it
|
||||
auto storageRspMsg = static_cast<RequestStorageDataRespMsg*>(respMsg.get());
|
||||
result.highResStatsList = std::move(storageRspMsg->getStatsList());
|
||||
result.storageTargetList = std::move(storageRspMsg->getStorageTargets());
|
||||
|
||||
result.data.isResponding = true;
|
||||
result.data.indirectWorkListSize = storageRspMsg->getIndirectWorkListSize();
|
||||
result.data.directWorkListSize = storageRspMsg->getDirectWorkListSize();
|
||||
result.data.diskSpaceTotal = storageRspMsg->getDiskSpaceTotalMiB();
|
||||
result.data.diskSpaceFree = storageRspMsg->getDiskSpaceFreeMiB();
|
||||
result.data.sessionCount = storageRspMsg->getSessionCount();
|
||||
result.data.hostnameid = storageRspMsg->gethostnameid();
|
||||
|
||||
if (!result.highResStatsList.empty())
|
||||
{
|
||||
auto lastStatsRequestTime = std::chrono::milliseconds(
|
||||
result.highResStatsList.front().rawVals.statsTimeMS);
|
||||
node->setLastStatRequestTime(lastStatsRequestTime);
|
||||
}
|
||||
|
||||
if (collectClientOpsByNode)
|
||||
result.ipOpsUnorderedMap = ClientOpsRequestor::request(*node, false);
|
||||
|
||||
if (collectClientOpsByUser)
|
||||
result.userOpsUnorderedMap = ClientOpsRequestor::request(*node, true);
|
||||
}
|
||||
}
|
||||
|
||||
result.node = std::move(node);
|
||||
|
||||
statsCollector->insertStorageData(std::move(result));
|
||||
}
|
||||
44
mon/source/components/worker/RequestStorageDataWork.h
Normal file
44
mon/source/components/worker/RequestStorageDataWork.h
Normal file
@@ -0,0 +1,44 @@
|
||||
#ifndef REQUESTSTORAGEDATAWORK_H_
|
||||
#define REQUESTSTORAGEDATAWORK_H_
|
||||
|
||||
#include <common/components/worker/Work.h>
|
||||
#include <common/nodes/ClientOps.h>
|
||||
#include <common/storage/StorageTargetInfo.h>
|
||||
#include <misc/TSDatabase.h>
|
||||
#include <nodes/StorageNodeEx.h>
|
||||
|
||||
class StatsCollector;
|
||||
|
||||
class RequestStorageDataWork : public Work
|
||||
{
|
||||
public:
|
||||
struct Result
|
||||
{
|
||||
std::shared_ptr<StorageNodeEx> node;
|
||||
StorageNodeDataContent data;
|
||||
HighResStatsList highResStatsList;
|
||||
StorageTargetInfoList storageTargetList;
|
||||
ClientOpsRequestor::IdOpsUnorderedMap ipOpsUnorderedMap;
|
||||
ClientOpsRequestor::IdOpsUnorderedMap userOpsUnorderedMap;
|
||||
};
|
||||
|
||||
RequestStorageDataWork(std::shared_ptr<StorageNodeEx> node,
|
||||
StatsCollector* statsCollector, bool collectClientOpsByNode,
|
||||
bool collectClientOpsByUser) :
|
||||
node(std::move(node)),
|
||||
statsCollector(statsCollector),
|
||||
collectClientOpsByNode(collectClientOpsByNode),
|
||||
collectClientOpsByUser(collectClientOpsByUser)
|
||||
{}
|
||||
|
||||
void process(char* bufIn, unsigned bufInLen, char* bufOut,
|
||||
unsigned bufOutLen);
|
||||
|
||||
private:
|
||||
std::shared_ptr<StorageNodeEx> node;
|
||||
StatsCollector* statsCollector;
|
||||
bool collectClientOpsByNode;
|
||||
bool collectClientOpsByUser;
|
||||
};
|
||||
|
||||
#endif /*REQUESTSTORAGEDATAWORK_H_*/
|
||||
8
mon/source/exception/CurlException.h
Normal file
8
mon/source/exception/CurlException.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef CURLEXCEPTION_H_
|
||||
#define CURLEXCEPTION_H_
|
||||
|
||||
#include <common/toolkit/NamedException.h>
|
||||
|
||||
DECLARE_NAMEDEXCEPTION(CurlException, "CurlException")
|
||||
|
||||
#endif /*CURLEXCEPTION_H_*/
|
||||
8
mon/source/exception/DatabaseException.h
Normal file
8
mon/source/exception/DatabaseException.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef DATABASEEXCEPTION_H_
|
||||
#define DATABASEEXCEPTION_H_
|
||||
|
||||
#include <common/toolkit/NamedException.h>
|
||||
|
||||
DECLARE_NAMEDEXCEPTION(DatabaseException, "DatabaseException")
|
||||
|
||||
#endif /*DATABASEEXCEPTION_H_*/
|
||||
348
mon/source/misc/Cassandra.cpp
Normal file
348
mon/source/misc/Cassandra.cpp
Normal file
@@ -0,0 +1,348 @@
|
||||
#include "Cassandra.h"
|
||||
|
||||
#include <common/storage/StorageTargetInfo.h>
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <exception/DatabaseException.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
static const std::string libVersion = "2.9";
|
||||
|
||||
template<typename T>
|
||||
std::function<T> loadSymbol(void* libHandle, const char* name)
|
||||
{
|
||||
dlerror();
|
||||
auto f = dlsym(libHandle, name);
|
||||
const char* error = dlerror();
|
||||
if (error != NULL)
|
||||
throw std::runtime_error("Couldn't load symbol: " + std::string(error)
|
||||
+ "\nThe cassandra plugin requires the datastax client library version " + libVersion
|
||||
+ ".");
|
||||
return reinterpret_cast<T(*)>(f);
|
||||
}
|
||||
|
||||
Cassandra::Cassandra(Config config) :
|
||||
cluster(nullptr, [this](CassCluster* c){cluster_free(c);}),
|
||||
session(nullptr, [this](CassSession* s){session_free(s);}),
|
||||
batch(nullptr, [this](CassBatch* b){batch_free(b);}),
|
||||
config(std::move(config)),
|
||||
libHandle(nullptr, dlclose),
|
||||
numQueries(0)
|
||||
{
|
||||
// Load datastax cassandra library
|
||||
dlerror();
|
||||
libHandle.reset(dlopen("libcassandra.so", RTLD_NOW));
|
||||
const char* error = dlerror();
|
||||
if (libHandle == NULL || error != NULL)
|
||||
{
|
||||
throw std::runtime_error("Couldn't load cassandra client library (libcassandra.so): "
|
||||
+ std::string(error) + "\nThe cassandra plugin requires the datastax client library"
|
||||
+ " version " + libVersion + ".");
|
||||
}
|
||||
|
||||
// load used symbols
|
||||
cluster_new = loadSymbol<decltype(cass_cluster_new)>(
|
||||
libHandle.get(), "cass_cluster_new");
|
||||
cluster_free = loadSymbol<decltype(cass_cluster_free)>(
|
||||
libHandle.get(), "cass_cluster_free");
|
||||
session_new = loadSymbol<decltype(cass_session_new)>(
|
||||
libHandle.get(), "cass_session_new");
|
||||
session_free = loadSymbol<decltype(cass_session_free)>(
|
||||
libHandle.get(), "cass_session_free");
|
||||
batch_new = loadSymbol<decltype(cass_batch_new)>(
|
||||
libHandle.get(), "cass_batch_new");
|
||||
batch_free = loadSymbol<decltype(cass_batch_free)>(
|
||||
libHandle.get(), "cass_batch_free");
|
||||
batch_add_statement = loadSymbol<decltype(cass_batch_add_statement)>(
|
||||
libHandle.get(), "cass_batch_add_statement");
|
||||
cluster_set_contact_points = loadSymbol<decltype(cass_cluster_set_contact_points)>(
|
||||
libHandle.get(), "cass_cluster_set_contact_points");
|
||||
cluster_set_port = loadSymbol<decltype(cass_cluster_set_port)>(
|
||||
libHandle.get(), "cass_cluster_set_port");
|
||||
session_connect = loadSymbol<decltype(cass_session_connect)>(
|
||||
libHandle.get(), "cass_session_connect");
|
||||
session_execute = loadSymbol<decltype(cass_session_execute)>(
|
||||
libHandle.get(), "cass_session_execute");
|
||||
session_execute_batch = loadSymbol<decltype(cass_session_execute_batch)>(
|
||||
libHandle.get(), "cass_session_execute_batch");
|
||||
future_error_code = loadSymbol<decltype(cass_future_error_code)>(
|
||||
libHandle.get(), "cass_future_error_code");
|
||||
future_error_message = loadSymbol<decltype(cass_future_error_message)>(
|
||||
libHandle.get(), "cass_future_error_message");
|
||||
future_free = loadSymbol<decltype(cass_future_free)>(
|
||||
libHandle.get(), "cass_future_free");
|
||||
statement_new = loadSymbol<decltype(cass_statement_new)>(
|
||||
libHandle.get(), "cass_statement_new");
|
||||
statement_free = loadSymbol<decltype(cass_statement_free)>(
|
||||
libHandle.get(), "cass_statement_free");
|
||||
|
||||
cluster.reset(cluster_new());
|
||||
session.reset(session_new());
|
||||
batch.reset(batch_new(CASS_BATCH_TYPE_LOGGED));
|
||||
|
||||
cluster_set_contact_points(cluster.get(), this->config.host.c_str());
|
||||
cluster_set_port(cluster.get(), this->config.port);
|
||||
|
||||
unsigned tries = 0;
|
||||
while (true)
|
||||
{
|
||||
auto connectFuture = std::unique_ptr<CassFuture, decltype(future_free)>(
|
||||
session_connect(session.get(), cluster.get()), future_free);
|
||||
|
||||
CassError err = future_error_code(connectFuture.get());
|
||||
if (err == CASS_OK)
|
||||
break;
|
||||
|
||||
const char* message;
|
||||
size_t length;
|
||||
future_error_message(connectFuture.get(), &message, &length);
|
||||
|
||||
LOG(DATABASE, ERR, "Couldn't connect to cassandra database: " + std::string(message));
|
||||
tries++;
|
||||
if (tries >= connectionRetries)
|
||||
throw DatabaseException("Connection to cassandra database failed.");
|
||||
else
|
||||
LOG(DATABASE, WARNING, "Retrying in 10 seconds.");
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(10));
|
||||
}
|
||||
|
||||
// Create and switch to keyspace
|
||||
query("CREATE KEYSPACE IF NOT EXISTS " + this->config.database + " WITH "
|
||||
+ "replication = {'class': 'SimpleStrategy', 'replication_factor' : 3};");
|
||||
query("USE " + this->config.database + ";");
|
||||
|
||||
// Create tables
|
||||
query("CREATE TABLE IF NOT EXISTS meta ("
|
||||
"time timestamp, nodeNumID int, nodeID varchar, isResponding boolean, "
|
||||
"indirectWorkListSize int, directWorkListSize int, PRIMARY KEY(time, nodeNumID));");
|
||||
|
||||
query("CREATE TABLE IF NOT EXISTS highResMeta ("
|
||||
"time timestamp, nodeNumID int, nodeID varchar, workRequests int, "
|
||||
"queuedRequests int, netSendBytes int, netRecvBytes int, PRIMARY KEY(time, nodeNumID));");
|
||||
|
||||
query("CREATE TABLE IF NOT EXISTS storage ("
|
||||
"time timestamp, nodeNumID int, nodeID varchar, isResponding boolean, "
|
||||
"indirectWorkListSize int, directWorkListSize int, "
|
||||
"diskSpaceTotal bigint, diskSpaceFree bigint, PRIMARY KEY(time, nodeNumID));");
|
||||
|
||||
query("CREATE TABLE IF NOT EXISTS highResStorage ("
|
||||
"time timestamp, nodeNumID int, nodeID varchar, workRequests int, "
|
||||
"queuedRequests int, diskWriteBytes int, diskReadBytes int, "
|
||||
"netSendBytes int, netRecvBytes int, PRIMARY KEY(time, nodeNumID));");
|
||||
|
||||
query("CREATE TABLE IF NOT EXISTS storageTargetData ("
|
||||
"time timestamp, nodeNumID int, nodeID varchar, storageTargetID int, "
|
||||
"diskSpaceTotal bigint, diskSpaceFree bigint, inodesTotal int, inodesFree int, "
|
||||
"PRIMARY KEY(time, nodeNumID));");
|
||||
|
||||
query("CREATE TABLE IF NOT EXISTS metaClientOpsByNode ("
|
||||
"time timestamp, node varchar, ops map<varchar,int> ,"
|
||||
"PRIMARY KEY(time, node));");
|
||||
query("CREATE TABLE IF NOT EXISTS storageClientOpsByNode ("
|
||||
"time timestamp, node varchar, ops map<varchar,int> ,"
|
||||
"PRIMARY KEY(time, node));");
|
||||
query("CREATE TABLE IF NOT EXISTS metaClientOpsByUser ("
|
||||
"time timestamp, user varchar, ops map<varchar,int> ,"
|
||||
"PRIMARY KEY(time, user));");
|
||||
query("CREATE TABLE IF NOT EXISTS storageClientOpsByUser ("
|
||||
"time timestamp, user varchar, ops map<varchar,int> ,"
|
||||
"PRIMARY KEY(time, user));");
|
||||
}
|
||||
|
||||
void Cassandra::query(const std::string& query, bool waitForResult)
|
||||
{
|
||||
CassStatement* statement = statement_new(query.c_str(), 0);
|
||||
auto queryFuture = std::unique_ptr<CassFuture, decltype(future_free)>(
|
||||
session_execute(session.get(), statement), future_free);
|
||||
statement_free(statement);
|
||||
|
||||
if (waitForResult)
|
||||
{
|
||||
CassError result = future_error_code(queryFuture.get());
|
||||
|
||||
if (result != CASS_OK)
|
||||
{
|
||||
const char* message;
|
||||
size_t length;
|
||||
future_error_message(queryFuture.get(), &message, &length);
|
||||
throw DatabaseException("Query '" + query + "' failed: " + std::string(message));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Cassandra::insertMetaNodeData(std::shared_ptr<Node> node, const MetaNodeDataContent& data)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO meta ";
|
||||
statement << "(time, nodeNumID, nodeID, isResponding";
|
||||
if (data.isResponding)
|
||||
statement << ", indirectWorkListSize, directWorkListSize) ";
|
||||
else
|
||||
statement << ") ";
|
||||
statement << "VALUES (";
|
||||
statement << "TOTIMESTAMP(NOW()), " << node->getNumID() << ", '" << node->getAlias() << "', ";
|
||||
statement << std::boolalpha << data.isResponding;
|
||||
if (data.isResponding)
|
||||
statement << ", " << data.indirectWorkListSize << ", " << data.directWorkListSize << ") ";
|
||||
else
|
||||
statement << ") ";
|
||||
statement << "USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
appendQuery(statement.str());
|
||||
}
|
||||
|
||||
void Cassandra::insertStorageNodeData(std::shared_ptr<Node> node,
|
||||
const StorageNodeDataContent& data)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO storage ";
|
||||
statement << "(time, nodeNumID, nodeID, isResponding";
|
||||
if (data.isResponding)
|
||||
statement << ", indirectWorkListSize, directWorkListSize, diskSpaceTotal, diskSpaceFree) ";
|
||||
else
|
||||
statement << ") ";
|
||||
statement << "VALUES (";
|
||||
statement << "TOTIMESTAMP(NOW()), " << node->getNumID() << ", '" << node->getAlias() << "', ";
|
||||
statement << std::boolalpha << data.isResponding;
|
||||
if (data.isResponding)
|
||||
statement << ", " << data.indirectWorkListSize << ", " << data.directWorkListSize << ", "
|
||||
<< data.diskSpaceTotal << ", " << data.diskSpaceFree << ") ";
|
||||
else
|
||||
statement << ") ";
|
||||
statement << "USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
appendQuery(statement.str());
|
||||
|
||||
}
|
||||
|
||||
void Cassandra::insertHighResMetaNodeData(std::shared_ptr<Node> node,
|
||||
const HighResolutionStats& data)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO highResMeta ";
|
||||
statement << "(time, nodeNumID, nodeID, workRequests, ";
|
||||
statement << "queuedRequests, netSendBytes, netRecvBytes) VALUES (";
|
||||
statement << data.rawVals.statsTimeMS << ", " << node->getNumID() << ", '" << node->getAlias() << "', ";
|
||||
statement << data.incVals.workRequests << ", " << data.rawVals.queuedRequests << ", ";
|
||||
statement << data.incVals.netSendBytes << ", " << data.incVals.netRecvBytes << ") ";
|
||||
statement << "USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
appendQuery(statement.str());
|
||||
}
|
||||
|
||||
void Cassandra::insertHighResStorageNodeData(std::shared_ptr<Node> node,
|
||||
const HighResolutionStats& data)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO highResStorage ";
|
||||
statement << "(time, nodeNumID, nodeID, workRequests, ";
|
||||
statement << "queuedRequests, diskWriteBytes, diskReadBytes, netSendBytes, netRecvBytes) VALUES (";
|
||||
statement << data.rawVals.statsTimeMS << ", " << node->getNumID() << ", '" << node->getAlias() << "', ";
|
||||
statement << data.incVals.workRequests << ", " << data.rawVals.queuedRequests << ", ";
|
||||
statement << data.incVals.diskWriteBytes << ", " << data.incVals.diskReadBytes << ", ";
|
||||
statement << data.incVals.netSendBytes << ", " << data.incVals.netRecvBytes << ") ";
|
||||
statement << "USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
appendQuery(statement.str());
|
||||
}
|
||||
|
||||
void Cassandra::insertStorageTargetsData(std::shared_ptr<Node> node,
|
||||
const StorageTargetInfo& data)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO storageTargetData ";
|
||||
statement << "(time, nodeNumID, nodeID, storageTargetID, ";
|
||||
statement << "diskSpaceTotal, diskSpaceFree, inodesTotal, inodesFree) VALUES (";
|
||||
statement << "TOTIMESTAMP(NOW()), " << node->getNumID() << ", '" << node->getAlias() << "', ";
|
||||
statement << data.getTargetID() << ", ";
|
||||
statement << data.getDiskSpaceTotal() << ", " << data.getDiskSpaceFree() << ", ";
|
||||
statement << data.getInodesTotal() << ", " << data.getInodesFree() << ") ";
|
||||
statement << "USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
appendQuery(statement.str());
|
||||
}
|
||||
|
||||
void Cassandra::insertClientNodeData(const std::string& id, const NodeType nodeType,
|
||||
const std::map<std::string, uint64_t>& opMap, bool perUser)
|
||||
{
|
||||
std::ostringstream statement;
|
||||
statement << "INSERT INTO ";
|
||||
if (perUser)
|
||||
{
|
||||
if (nodeType == NODETYPE_Meta)
|
||||
statement << "metaClientOpsByUser";
|
||||
else if (nodeType == NODETYPE_Storage)
|
||||
statement << "storageClientOpsByUser";
|
||||
else
|
||||
throw DatabaseException("Invalid Nodetype given.");
|
||||
|
||||
statement << " (time, user, ops) VALUES (";
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nodeType == NODETYPE_Meta)
|
||||
statement << "metaClientOpsByNode";
|
||||
else if (nodeType == NODETYPE_Storage)
|
||||
statement << "storageClientOpsByNode";
|
||||
else
|
||||
throw DatabaseException("Invalid Nodetype given.");
|
||||
|
||||
statement << " (time, node, ops) VALUES (";
|
||||
}
|
||||
|
||||
statement << "TOTIMESTAMP(NOW()), '" << id << "', {";
|
||||
|
||||
bool first = true;
|
||||
|
||||
for (auto iter = opMap.begin(); iter != opMap.end(); iter++)
|
||||
{
|
||||
if (iter->second == 0)
|
||||
continue;
|
||||
|
||||
statement << (first ? "" : ",") << "'" << iter->first << "':" << iter->second;
|
||||
first = false;
|
||||
}
|
||||
|
||||
statement << "}) USING TTL " << config.TTLSecs << ";";
|
||||
|
||||
// if no fields are != 0, dont write anything
|
||||
if (!first)
|
||||
appendQuery(statement.str());
|
||||
}
|
||||
|
||||
void Cassandra::appendQuery(const std::string& query)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(queryMutex);
|
||||
|
||||
CassStatement* statement = statement_new(query.c_str(), 0);
|
||||
batch_add_statement(batch.get(), statement);
|
||||
statement_free(statement);
|
||||
|
||||
numQueries++;
|
||||
|
||||
if (numQueries >= config.maxInsertsPerBatch)
|
||||
{
|
||||
writeUnlocked();
|
||||
}
|
||||
}
|
||||
|
||||
void Cassandra::write()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(queryMutex);
|
||||
|
||||
if(numQueries)
|
||||
writeUnlocked();
|
||||
}
|
||||
|
||||
void Cassandra::writeUnlocked()
|
||||
{
|
||||
CassFuture* batchFuture = session_execute_batch(session.get(), batch.get());
|
||||
batch.reset(batch_new(CASS_BATCH_TYPE_LOGGED));
|
||||
future_free(batchFuture);
|
||||
|
||||
LOG(DATABASE, DEBUG, "Sent queries to Cassandra.", numQueries);
|
||||
numQueries = 0;
|
||||
}
|
||||
|
||||
80
mon/source/misc/Cassandra.h
Normal file
80
mon/source/misc/Cassandra.h
Normal file
@@ -0,0 +1,80 @@
|
||||
#ifndef CASSANDRA_H_
|
||||
#define CASSANDRA_H_
|
||||
|
||||
#include <common/nodes/NodeType.h>
|
||||
#include <common/threading/Mutex.h>
|
||||
#include <nodes/MetaNodeEx.h>
|
||||
#include <nodes/StorageNodeEx.h>
|
||||
#include <misc/TSDatabase.h>
|
||||
|
||||
#include <cassandra.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
class Cassandra : public TSDatabase
|
||||
{
|
||||
public:
|
||||
|
||||
struct Config
|
||||
{
|
||||
std::string host;
|
||||
int port;
|
||||
std::string database;
|
||||
unsigned maxInsertsPerBatch;
|
||||
unsigned TTLSecs;
|
||||
};
|
||||
|
||||
Cassandra(Config config);
|
||||
virtual ~Cassandra() {};
|
||||
|
||||
virtual void insertMetaNodeData(
|
||||
std::shared_ptr<Node> node, const MetaNodeDataContent& data) override;
|
||||
virtual void insertStorageNodeData(
|
||||
std::shared_ptr<Node> node, const StorageNodeDataContent& data) override;
|
||||
virtual void insertHighResMetaNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
|
||||
virtual void insertHighResStorageNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
|
||||
virtual void insertStorageTargetsData(
|
||||
std::shared_ptr<Node> node, const StorageTargetInfo& data) override;
|
||||
virtual void insertClientNodeData(
|
||||
const std::string& id, const NodeType nodeType,
|
||||
const std::map<std::string, uint64_t>& opMap, bool perUser) override;
|
||||
virtual void write() override;
|
||||
|
||||
private:
|
||||
std::function<decltype(cass_cluster_new)> cluster_new;
|
||||
std::function<decltype(cass_cluster_free)> cluster_free;
|
||||
std::function<decltype(cass_session_new)> session_new;
|
||||
std::function<decltype(cass_session_free)> session_free;
|
||||
std::function<decltype(cass_batch_new)> batch_new;
|
||||
std::function<decltype(cass_batch_free)> batch_free;
|
||||
std::function<decltype(cass_batch_add_statement)> batch_add_statement;
|
||||
std::function<decltype(cass_cluster_set_contact_points)> cluster_set_contact_points;
|
||||
std::function<decltype(cass_cluster_set_port)> cluster_set_port;
|
||||
std::function<decltype(cass_session_connect)> session_connect;
|
||||
std::function<decltype(cass_session_execute)> session_execute;
|
||||
std::function<decltype(cass_session_execute_batch)> session_execute_batch;
|
||||
std::function<decltype(cass_future_error_code)> future_error_code;
|
||||
std::function<decltype(cass_future_error_message)> future_error_message;
|
||||
std::function<decltype(cass_future_free)> future_free;
|
||||
std::function<decltype(cass_statement_new)> statement_new;
|
||||
std::function<decltype(cass_statement_free)> statement_free;
|
||||
|
||||
std::unique_ptr<CassCluster, decltype(cluster_free)> cluster;
|
||||
std::unique_ptr<CassSession, decltype(session_free)> session;
|
||||
std::unique_ptr<CassBatch, decltype(batch_free)> batch;
|
||||
|
||||
const Config config;
|
||||
std::unique_ptr<void, int(*)(void*)> libHandle;
|
||||
|
||||
std::string queryBuffer;
|
||||
unsigned numQueries;
|
||||
|
||||
mutable Mutex queryMutex;
|
||||
|
||||
void appendQuery(const std::string& query);
|
||||
void query(const std::string& query, bool waitForResult = true);
|
||||
void writeUnlocked();
|
||||
};
|
||||
|
||||
#endif
|
||||
153
mon/source/misc/CurlWrapper.cpp
Normal file
153
mon/source/misc/CurlWrapper.cpp
Normal file
@@ -0,0 +1,153 @@
|
||||
#include "CurlWrapper.h"
|
||||
|
||||
#include <exception/CurlException.h>
|
||||
|
||||
CurlWrapper::CurlWrapper(std::chrono::milliseconds timeout, bool checkSSLCertificates) :
|
||||
curlHandle(curl_easy_init(), &curl_easy_cleanup)
|
||||
{
|
||||
if (curlHandle.get() == NULL)
|
||||
throw CurlException("Curl init failed.");
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_ERRORBUFFER, &errorBuffer) != CURLE_OK)
|
||||
throw CurlException("Setting Curl error buffer failed.");
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_NOSIGNAL, 1L) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_TIMEOUT_MS,
|
||||
std::chrono::milliseconds(timeout).count()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_WRITEFUNCTION, writeCallback) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_WRITEDATA, static_cast<void*>(this)) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_CONNECTTIMEOUT_MS,
|
||||
timeout.count()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (!checkSSLCertificates)
|
||||
{
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_SSL_VERIFYPEER, 0) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_SSL_VERIFYHOST, 0) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
void CurlWrapper::enableHttpAuth(const std::string& user, const std::string& password)
|
||||
{
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_HTTPAUTH, CURLAUTH_ANY))
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_USERNAME, user.c_str()))
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_PASSWORD, password.c_str()))
|
||||
throw CurlException(errorBuffer);
|
||||
}
|
||||
|
||||
|
||||
unsigned short CurlWrapper::sendGetRequest(const std::string& url, const ParameterMap& parameters)
|
||||
{
|
||||
std::string parameterStr = makeParameterStr(parameters);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_URL, (url + parameterStr).c_str()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_HTTPGET, 1L) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
// replace with curl_multi_perform?
|
||||
if (curl_easy_perform(curlHandle.get()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
long responseCode;
|
||||
if (curl_easy_getinfo(curlHandle.get(), CURLINFO_RESPONSE_CODE, &responseCode) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
return responseCode;
|
||||
}
|
||||
|
||||
unsigned short CurlWrapper::sendPostRequest(const std::string& url, const char* data,
|
||||
const ParameterMap& parameters, const std::vector<std::string>& headers)
|
||||
{
|
||||
std::string parameterStr = makeParameterStr(parameters);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_URL, (url + parameterStr).c_str()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_POSTFIELDS, data) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
struct curl_slist* headerList = nullptr;
|
||||
for (const auto& header : headers) {
|
||||
headerList = curl_slist_append(headerList, header.c_str());
|
||||
}
|
||||
|
||||
if (curl_easy_setopt(curlHandle.get(), CURLOPT_HTTPHEADER, headerList) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
// replace with curl_multi_perform?
|
||||
if (curl_easy_perform(curlHandle.get()) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
long responseCode;
|
||||
if (curl_easy_getinfo(curlHandle.get(), CURLINFO_RESPONSE_CODE, &responseCode) != CURLE_OK)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
return responseCode;
|
||||
}
|
||||
|
||||
std::string CurlWrapper::makeParameterStr(const ParameterMap& parameters) const
|
||||
{
|
||||
if (!parameters.empty())
|
||||
{
|
||||
std::string parameterStr = "?";
|
||||
|
||||
for (auto iter = parameters.begin(); iter != parameters.end(); iter++)
|
||||
{
|
||||
{
|
||||
auto escaped = std::unique_ptr<char, void(*)(void*)> (
|
||||
curl_easy_escape(curlHandle.get(), (iter->first).c_str(),0),
|
||||
&curl_free);
|
||||
|
||||
if (!escaped)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
parameterStr += escaped.get();
|
||||
}
|
||||
|
||||
{
|
||||
auto escaped = std::unique_ptr<char, void(*)(void*)> (
|
||||
curl_easy_escape(curlHandle.get(), (iter->second).c_str(),0),
|
||||
&curl_free);
|
||||
|
||||
if (!escaped)
|
||||
throw CurlException(errorBuffer);
|
||||
|
||||
parameterStr += "=";
|
||||
parameterStr += escaped.get();
|
||||
parameterStr += "&";
|
||||
}
|
||||
}
|
||||
|
||||
parameterStr.resize(parameterStr.size() - 1);
|
||||
|
||||
return parameterStr;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
size_t CurlWrapper::writeCallback(char *ptr, size_t size, size_t nmemb, void *userdata)
|
||||
{
|
||||
auto instance = static_cast<CurlWrapper*>(userdata);
|
||||
instance->setResponse(std::string(ptr, size*nmemb));
|
||||
|
||||
// Always signal success
|
||||
return size*nmemb;
|
||||
}
|
||||
57
mon/source/misc/CurlWrapper.h
Normal file
57
mon/source/misc/CurlWrapper.h
Normal file
@@ -0,0 +1,57 @@
|
||||
#ifndef CURL_WRAPPER_H_
|
||||
#define CURL_WRAPPER_H_
|
||||
|
||||
#include <common/threading/Mutex.h>
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
class CurlWrapper
|
||||
{
|
||||
public:
|
||||
CurlWrapper(std::chrono::milliseconds timeout, bool checkSSLCertificates);
|
||||
|
||||
CurlWrapper(const CurlWrapper&) = delete;
|
||||
CurlWrapper& operator=(const CurlWrapper&) = delete;
|
||||
CurlWrapper(CurlWrapper&&) = delete;
|
||||
CurlWrapper& operator=(CurlWrapper&&) = delete;
|
||||
|
||||
~CurlWrapper() = default;
|
||||
|
||||
void enableHttpAuth(const std::string& user, const std::string& password);
|
||||
|
||||
typedef std::unordered_map<std::string, std::string> ParameterMap;
|
||||
|
||||
unsigned short sendGetRequest(const std::string& url,
|
||||
const ParameterMap& parameters);
|
||||
unsigned short sendPostRequest(const std::string& url, const char* data,
|
||||
const ParameterMap& parameters, const std::vector<std::string>& headers);
|
||||
|
||||
static size_t writeCallback(char *ptr, size_t size, size_t nmemb, void *userdata);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<CURL, void(*)(void*)> curlHandle;
|
||||
std::string response;
|
||||
|
||||
char errorBuffer[CURL_ERROR_SIZE];
|
||||
|
||||
std::string makeParameterStr(const ParameterMap& parameters) const;
|
||||
|
||||
void setResponse(const std::string& response)
|
||||
{
|
||||
this->response = response;
|
||||
}
|
||||
|
||||
public:
|
||||
const std::string& getResponse() const
|
||||
{
|
||||
return response;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
344
mon/source/misc/InfluxDB.cpp
Normal file
344
mon/source/misc/InfluxDB.cpp
Normal file
@@ -0,0 +1,344 @@
|
||||
#include "InfluxDB.h"
|
||||
|
||||
#include <common/storage/StorageTargetInfo.h>
|
||||
#include <common/toolkit/StringTk.h>
|
||||
#include <exception/DatabaseException.h>
|
||||
#include <exception/CurlException.h>
|
||||
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
|
||||
static const std::string retentionPolicyName = "auto";
|
||||
|
||||
InfluxDB::InfluxDB(Config cfg) :
|
||||
config(std::move(cfg))
|
||||
{
|
||||
curlWrapper = boost::make_unique<CurlWrapper>(config.httpTimeout, config.curlCheckSSLCertificates);
|
||||
if (config.dbVersion == INFLUXDB)
|
||||
{
|
||||
if (!config.username.empty())
|
||||
curlWrapper->enableHttpAuth(config.username, config.password);
|
||||
|
||||
setupDatabase();
|
||||
}
|
||||
}
|
||||
|
||||
void InfluxDB::setupDatabase() const
|
||||
{
|
||||
// Wait for InfluxDB service being available
|
||||
unsigned tries = 0;
|
||||
while(!sendPing())
|
||||
{
|
||||
tries++;
|
||||
LOG(DATABASE, ERR, "Coudn't reach InfluxDB service.");
|
||||
if (tries >= connectionRetries)
|
||||
throw DatabaseException("Connection to InfluxDB failed.");
|
||||
else
|
||||
LOG(DATABASE, WARNING, "Retrying in 10 seconds.");
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(10));
|
||||
}
|
||||
|
||||
// these are called every time the service starts but is being ignored by influxdb if
|
||||
// the db and rp already exist
|
||||
sendQuery("create database " + config.database);
|
||||
if (config.setRetentionPolicy)
|
||||
{
|
||||
sendQuery("create retention policy " + retentionPolicyName + " on " + config.database
|
||||
+ " duration " + config.retentionDuration
|
||||
+ " replication 1 default");
|
||||
}
|
||||
}
|
||||
|
||||
void InfluxDB::insertMetaNodeData(std::shared_ptr<Node> node, const MetaNodeDataContent& data)
|
||||
{
|
||||
std::ostringstream point;
|
||||
point << "meta";
|
||||
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
|
||||
point << ",nodeNumID=" << node->getNumID();
|
||||
|
||||
if(data.isResponding)
|
||||
{
|
||||
point << " isResponding=" << std::boolalpha << true;
|
||||
point << ",indirectWorkListSize=" << data.indirectWorkListSize;
|
||||
point << ",directWorkListSize=" << data.directWorkListSize;
|
||||
point << ",hostnameid=\"" << data.hostnameid << "\"";
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
point << " isResponding=" << std::boolalpha << false;
|
||||
}
|
||||
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
void InfluxDB::insertStorageNodeData(std::shared_ptr<Node> node,
|
||||
const StorageNodeDataContent& data)
|
||||
{
|
||||
std::ostringstream point;
|
||||
point << "storage";
|
||||
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
|
||||
point << ",nodeNumID=" << node->getNumID();
|
||||
|
||||
if(data.isResponding)
|
||||
{
|
||||
point << " isResponding=" << std::boolalpha << true;
|
||||
point << ",indirectWorkListSize=" << data.indirectWorkListSize;
|
||||
point << ",directWorkListSize=" << data.directWorkListSize;
|
||||
point << ",diskSpaceTotal=" << data.diskSpaceTotal;
|
||||
point << ",diskSpaceFree=" << data.diskSpaceFree;
|
||||
point << ",hostnameid=\"" << data.hostnameid << "\"";
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
point << " isResponding=" << std::boolalpha << false;
|
||||
}
|
||||
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
void InfluxDB::insertHighResMetaNodeData(std::shared_ptr<Node> node,
|
||||
const HighResolutionStats& data)
|
||||
{
|
||||
std::ostringstream point;
|
||||
point << "highResMeta";
|
||||
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
|
||||
point << ",nodeNumID=" << node->getNumID();
|
||||
|
||||
point << " workRequests=" << data.incVals.workRequests;
|
||||
point << ",queuedRequests=" << data.rawVals.queuedRequests;
|
||||
point << ",netSendBytes=" << data.incVals.netSendBytes;
|
||||
point << ",netRecvBytes=" << data.incVals.netRecvBytes;
|
||||
|
||||
// timestamp in ns
|
||||
point << " " << std::chrono::nanoseconds(
|
||||
std::chrono::milliseconds(data.rawVals.statsTimeMS)).count();
|
||||
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
void InfluxDB::insertHighResStorageNodeData(std::shared_ptr<Node> node,
|
||||
const HighResolutionStats& data)
|
||||
{
|
||||
std::ostringstream point;
|
||||
point << "highResStorage";
|
||||
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
|
||||
point << ",nodeNumID=" << node->getNumID();
|
||||
|
||||
point << " workRequests=" << data.incVals.workRequests;
|
||||
point << ",queuedRequests=" << data.rawVals.queuedRequests;
|
||||
point << ",diskWriteBytes=" << data.incVals.diskWriteBytes;
|
||||
point << ",diskReadBytes=" << data.incVals.diskReadBytes;
|
||||
point << ",netSendBytes=" << data.incVals.netSendBytes;
|
||||
point << ",netRecvBytes=" << data.incVals.netRecvBytes;
|
||||
|
||||
// timestamp in ns
|
||||
point << " " << std::chrono::nanoseconds(
|
||||
std::chrono::milliseconds(data.rawVals.statsTimeMS)).count();
|
||||
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
void InfluxDB::insertStorageTargetsData(std::shared_ptr<Node> node,
|
||||
const StorageTargetInfo& data)
|
||||
{
|
||||
std::ostringstream point;
|
||||
point << "storageTargets";
|
||||
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
|
||||
point << ",nodeNumID=" << node->getNumID();
|
||||
point << ",storageTargetID=" << data.getTargetID();
|
||||
|
||||
point << " diskSpaceTotal=" << data.getDiskSpaceTotal();
|
||||
point << ",diskSpaceFree=" << data.getDiskSpaceFree();
|
||||
point << ",inodesTotal=" << data.getInodesTotal();
|
||||
point << ",inodesFree=" << data.getInodesFree();
|
||||
|
||||
std::string t;
|
||||
if (data.getState() == TargetConsistencyState::TargetConsistencyState_GOOD)
|
||||
t = "GOOD";
|
||||
else if (data.getState() == TargetConsistencyState::TargetConsistencyState_NEEDS_RESYNC)
|
||||
t = "NEEDS_RESYNC";
|
||||
else
|
||||
t = "BAD";
|
||||
|
||||
point << ",targetConsistencyState=\"" << t << "\"";
|
||||
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
void InfluxDB::insertClientNodeData(const std::string& id, const NodeType nodeType,
|
||||
const std::map<std::string, uint64_t>& opMap, bool perUser)
|
||||
{
|
||||
std::ostringstream point;
|
||||
if (perUser)
|
||||
{
|
||||
if (nodeType == NODETYPE_Meta)
|
||||
point << "metaClientOpsByUser";
|
||||
else if (nodeType == NODETYPE_Storage)
|
||||
point << "storageClientOpsByUser";
|
||||
else
|
||||
throw DatabaseException("Invalid Nodetype given.");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nodeType == NODETYPE_Meta)
|
||||
point << "metaClientOpsByNode";
|
||||
else if (nodeType == NODETYPE_Storage)
|
||||
point << "storageClientOpsByNode";
|
||||
else
|
||||
throw DatabaseException("Invalid Nodetype given.");
|
||||
}
|
||||
|
||||
point << (perUser ? ",user=" : ",node=") << id;
|
||||
|
||||
bool first = true;
|
||||
|
||||
for (auto iter = opMap.begin(); iter != opMap.end(); iter++)
|
||||
{
|
||||
if (iter->second == 0)
|
||||
continue;
|
||||
|
||||
point << (first ? " " : ",") << iter->first << "=" << iter->second;
|
||||
first = false;
|
||||
}
|
||||
|
||||
// if no fields are != 0, dont write anything
|
||||
if (!first)
|
||||
appendPoint(point.str());
|
||||
}
|
||||
|
||||
|
||||
void InfluxDB::appendPoint(const std::string& point)
|
||||
{
|
||||
const std::lock_guard<Mutex> mutexLock(pointsMutex);
|
||||
|
||||
points += point + "\n";
|
||||
numPoints++;
|
||||
|
||||
// test also for size? make it an option?
|
||||
if (numPoints >= config.maxPointsPerRequest)
|
||||
{
|
||||
writePointsUnlocked();
|
||||
}
|
||||
}
|
||||
void InfluxDB::write()
|
||||
{
|
||||
const std::lock_guard<Mutex> mutexLock(pointsMutex);
|
||||
writePointsUnlocked();
|
||||
}
|
||||
|
||||
void InfluxDB::writePointsUnlocked()
|
||||
{
|
||||
sendWrite(points);
|
||||
points.clear();
|
||||
LOG(DATABASE, DEBUG, "Sent data to InfluxDB.", numPoints);
|
||||
numPoints = 0;
|
||||
}
|
||||
|
||||
void InfluxDB::sendWrite(const std::string& data) const
|
||||
{
|
||||
unsigned short responseCode = 0;
|
||||
CurlWrapper::ParameterMap params;
|
||||
std::string url;
|
||||
std::vector<std::string> headers;
|
||||
if (config.dbVersion == INFLUXDB)
|
||||
{
|
||||
params["db"] = config.database;
|
||||
url = config.host + ":" + StringTk::intToStr(config.port) + "/write";
|
||||
}
|
||||
else
|
||||
{
|
||||
params["org"] = config.organization;
|
||||
params["bucket"] = config.bucket;
|
||||
url = config.host + ":" + StringTk::intToStr(config.port) + "/api/v2/write";
|
||||
headers.push_back("Authorization: Token " + config.token);
|
||||
}
|
||||
|
||||
const std::lock_guard<Mutex> mutexLock(curlMutex);
|
||||
|
||||
try
|
||||
{
|
||||
responseCode = curlWrapper->sendPostRequest(url, data.c_str(), params, headers);
|
||||
}
|
||||
catch (const CurlException& e)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Writing to InfluxDB failed due to Curl error.", ("Error", e.what()));
|
||||
return;
|
||||
}
|
||||
|
||||
if (responseCode < 200 || responseCode >= 300)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Writing to InfluxDB failed.", responseCode,
|
||||
("responseMessage", curlWrapper->getResponse()));
|
||||
}
|
||||
}
|
||||
|
||||
void InfluxDB::sendQuery(const std::string& data) const
|
||||
{
|
||||
unsigned short responseCode = 0;
|
||||
CurlWrapper::ParameterMap params;
|
||||
params["db"] = config.database;
|
||||
params["q"] = data;
|
||||
|
||||
const std::lock_guard<Mutex> mutexLock(curlMutex);
|
||||
|
||||
try
|
||||
{
|
||||
responseCode = curlWrapper->sendPostRequest(config.host + ":"
|
||||
+ StringTk::intToStr(config.port)
|
||||
+ "/query", "", params, {});
|
||||
}
|
||||
catch (const CurlException& e)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Querying InfluxDB failed due to Curl error.", ("Error", e.what()));
|
||||
return;
|
||||
}
|
||||
|
||||
if (responseCode < 200 || responseCode >= 300)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Querying InfluxDB failed.", responseCode,
|
||||
("responseMessage", curlWrapper->getResponse()));
|
||||
}
|
||||
}
|
||||
|
||||
bool InfluxDB::sendPing() const
|
||||
{
|
||||
unsigned short responseCode = 0;
|
||||
|
||||
const std::lock_guard<Mutex> mutexLock(curlMutex);
|
||||
|
||||
try
|
||||
{
|
||||
responseCode = curlWrapper->sendGetRequest(config.host + ":"
|
||||
+ StringTk::intToStr(config.port) + "/ping", CurlWrapper::ParameterMap());
|
||||
}
|
||||
catch (const CurlException& e)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Pinging InfluxDB failed due to Curl error.", ("Error", e.what()));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (responseCode < 200 || responseCode >= 300)
|
||||
{
|
||||
LOG(DATABASE, ERR, "Pinging InfluxDB failed.", responseCode,
|
||||
("responseMessage", curlWrapper->getResponse()));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* According to InfluxDB documentation, spaces, "=" and "," need to be escaped for write.
|
||||
*/
|
||||
std::string InfluxDB::escapeStringForWrite(const std::string& str)
|
||||
{
|
||||
std::string result = str;
|
||||
boost::replace_all(result, " ", "\\ ");
|
||||
boost::replace_all(result, "=", "\\=");
|
||||
boost::replace_all(result, ",", "\\,");
|
||||
return result;
|
||||
}
|
||||
84
mon/source/misc/InfluxDB.h
Normal file
84
mon/source/misc/InfluxDB.h
Normal file
@@ -0,0 +1,84 @@
|
||||
#ifndef INFLUXDB_H_
|
||||
#define INFLUXDB_H_
|
||||
|
||||
#include <common/nodes/NodeType.h>
|
||||
#include <common/threading/Mutex.h>
|
||||
#include <nodes/MetaNodeEx.h>
|
||||
#include <nodes/StorageNodeEx.h>
|
||||
#include <misc/CurlWrapper.h>
|
||||
#include <misc/TSDatabase.h>
|
||||
#include <app/Config.h>
|
||||
|
||||
enum InfluxDBVersion
|
||||
{
|
||||
INFLUXDB,
|
||||
INFLUXDB2,
|
||||
};
|
||||
|
||||
class App;
|
||||
|
||||
class InfluxDB : public TSDatabase
|
||||
{
|
||||
public:
|
||||
|
||||
struct Config
|
||||
{
|
||||
std::string host;
|
||||
int port;
|
||||
std::string database;
|
||||
std::chrono::milliseconds httpTimeout;
|
||||
unsigned maxPointsPerRequest;
|
||||
bool setRetentionPolicy;
|
||||
std::string retentionDuration;
|
||||
bool curlCheckSSLCertificates;
|
||||
std::string username;
|
||||
std::string password;
|
||||
std::string bucket;
|
||||
std::string organization;
|
||||
std::string token;
|
||||
InfluxDBVersion dbVersion;
|
||||
|
||||
};
|
||||
|
||||
InfluxDB(Config cfg);
|
||||
virtual ~InfluxDB() {};
|
||||
|
||||
virtual void insertMetaNodeData(
|
||||
std::shared_ptr<Node> node, const MetaNodeDataContent& data) override;
|
||||
virtual void insertStorageNodeData(
|
||||
std::shared_ptr<Node> node, const StorageNodeDataContent& data) override;
|
||||
virtual void insertHighResMetaNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
|
||||
virtual void insertHighResStorageNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
|
||||
virtual void insertStorageTargetsData(
|
||||
std::shared_ptr<Node> node, const StorageTargetInfo& data) override;
|
||||
virtual void insertClientNodeData(
|
||||
const std::string& id, const NodeType nodeType,
|
||||
const std::map<std::string, uint64_t>& opMap, bool perUser) override;
|
||||
virtual void write() override;
|
||||
|
||||
static std::string escapeStringForWrite(const std::string& str);
|
||||
|
||||
private:
|
||||
const Config config;
|
||||
|
||||
std::unique_ptr<CurlWrapper> curlWrapper;
|
||||
|
||||
std::string points;
|
||||
unsigned numPoints = 0;
|
||||
|
||||
mutable Mutex pointsMutex;
|
||||
mutable Mutex curlMutex;
|
||||
|
||||
void setupDatabase() const;
|
||||
void appendPoint(const std::string& point);
|
||||
void writePointsUnlocked();
|
||||
void sendWrite(const std::string& data) const;
|
||||
void sendQuery(const std::string& data) const;
|
||||
bool sendPing() const;
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
34
mon/source/misc/TSDatabase.h
Normal file
34
mon/source/misc/TSDatabase.h
Normal file
@@ -0,0 +1,34 @@
|
||||
#ifndef TS_DATABASE_H_
|
||||
#define TS_DATABASE_H_
|
||||
|
||||
#include <common/nodes/NodeType.h>
|
||||
#include <nodes/MetaNodeEx.h>
|
||||
#include <nodes/StorageNodeEx.h>
|
||||
#include <app/Config.h>
|
||||
|
||||
class TSDatabase
|
||||
{
|
||||
public:
|
||||
static const unsigned connectionRetries = 3;
|
||||
|
||||
TSDatabase() {};
|
||||
virtual ~TSDatabase() {};
|
||||
|
||||
virtual void insertMetaNodeData(
|
||||
std::shared_ptr<Node> node, const MetaNodeDataContent& data) = 0;
|
||||
virtual void insertStorageNodeData(
|
||||
std::shared_ptr<Node> node, const StorageNodeDataContent& data) = 0;
|
||||
virtual void insertHighResMetaNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) = 0;
|
||||
virtual void insertHighResStorageNodeData(
|
||||
std::shared_ptr<Node> node, const HighResolutionStats& data) = 0;
|
||||
virtual void insertStorageTargetsData(
|
||||
std::shared_ptr<Node> node, const StorageTargetInfo& data) = 0;
|
||||
virtual void insertClientNodeData(
|
||||
const std::string& id, const NodeType nodeType,
|
||||
const std::map<std::string, uint64_t>& opMap, bool perUser) = 0;
|
||||
|
||||
virtual void write() = 0;
|
||||
};
|
||||
|
||||
#endif
|
||||
49
mon/source/net/message/NetMessageFactory.cpp
Normal file
49
mon/source/net/message/NetMessageFactory.cpp
Normal file
@@ -0,0 +1,49 @@
|
||||
#include <common/net/message/SimpleMsg.h>
|
||||
#include <common/net/message/NetMessageTypes.h>
|
||||
#include <common/net/message/mon/RequestMetaDataRespMsg.h>
|
||||
#include <common/net/message/mon/RequestStorageDataRespMsg.h>
|
||||
#include <common/net/message/control/DummyMsg.h>
|
||||
#include <common/net/message/control/GenericResponseMsg.h>
|
||||
#include <common/net/message/nodes/GetClientStatsRespMsg.h>
|
||||
#include <common/net/message/nodes/GetMirrorBuddyGroupsRespMsg.h>
|
||||
#include <common/net/message/nodes/GetNodesRespMsg.h>
|
||||
#include <common/net/message/nodes/GetTargetMappingsRespMsg.h>
|
||||
#include <common/net/message/storage/lookup/FindOwnerRespMsg.h>
|
||||
|
||||
#include <net/message/nodes/HeartbeatMsgEx.h>
|
||||
|
||||
#include "NetMessageFactory.h"
|
||||
|
||||
/**
|
||||
* @return NetMessage that must be deleted by the caller
|
||||
* (msg->msgType is NETMSGTYPE_Invalid on error)
|
||||
*/
|
||||
std::unique_ptr<NetMessage> NetMessageFactory::createFromMsgType(unsigned short msgType) const
|
||||
{
|
||||
NetMessage* msg;
|
||||
|
||||
switch(msgType)
|
||||
{
|
||||
// The following lines shoudle be grouped by "type of the message" and ordered alphabetically
|
||||
// inside the groups. There should always be one message per line to keep a clear layout
|
||||
// (although this might lead to lines that are longer than usual)
|
||||
|
||||
case NETMSGTYPE_FindOwnerResp: { msg = new FindOwnerRespMsg(); } break;
|
||||
case NETMSGTYPE_GenericResponse: { msg = new GenericResponseMsg(); } break;
|
||||
case NETMSGTYPE_GetClientStatsResp: { msg = new GetClientStatsRespMsg(); } break;
|
||||
case NETMSGTYPE_GetMirrorBuddyGroupsResp: { msg = new GetMirrorBuddyGroupsRespMsg(); } break;
|
||||
case NETMSGTYPE_GetNodesResp: { msg = new GetNodesRespMsg(); } break;
|
||||
case NETMSGTYPE_GetTargetMappingsResp: { msg = new GetTargetMappingsRespMsg(); } break;
|
||||
case NETMSGTYPE_Heartbeat: { msg = new HeartbeatMsgEx(); } break;
|
||||
case NETMSGTYPE_RequestMetaDataResp: { msg = new RequestMetaDataRespMsg(); } break;
|
||||
case NETMSGTYPE_RequestStorageDataResp: { msg = new RequestStorageDataRespMsg(); } break;
|
||||
|
||||
default:
|
||||
{
|
||||
msg = new SimpleMsg(NETMSGTYPE_Invalid);
|
||||
} break;
|
||||
}
|
||||
|
||||
return std::unique_ptr<NetMessage>(msg);
|
||||
}
|
||||
|
||||
13
mon/source/net/message/NetMessageFactory.h
Normal file
13
mon/source/net/message/NetMessageFactory.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef NETMESSAGEFACTORY_H_
|
||||
#define NETMESSAGEFACTORY_H_
|
||||
|
||||
#include <common/Common.h>
|
||||
#include <common/net/message/AbstractNetMessageFactory.h>
|
||||
|
||||
class NetMessageFactory : public AbstractNetMessageFactory
|
||||
{
|
||||
protected:
|
||||
virtual std::unique_ptr<NetMessage> createFromMsgType(unsigned short msgType) const override;
|
||||
} ;
|
||||
|
||||
#endif /*NETMESSAGEFACTORY_H_*/
|
||||
11
mon/source/net/message/nodes/HeartbeatMsgEx.h
Normal file
11
mon/source/net/message/nodes/HeartbeatMsgEx.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#ifndef HEARTBEATMSGEX_H_
|
||||
#define HEARTBEATMSGEX_H_
|
||||
|
||||
#include <common/net/message/nodes/HeartbeatMsg.h>
|
||||
|
||||
// This is only a dummy so the mgmt download doesn't fail
|
||||
|
||||
class HeartbeatMsgEx : public HeartbeatMsg
|
||||
{};
|
||||
|
||||
#endif /*HEARTBEATMSGEX_H_*/
|
||||
17
mon/source/nodes/MetaNodeEx.cpp
Normal file
17
mon/source/nodes/MetaNodeEx.cpp
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "MetaNodeEx.h"
|
||||
|
||||
MetaNodeEx::MetaNodeEx(std::shared_ptr<Node> receivedNode) :
|
||||
Node(NODETYPE_Meta, receivedNode->getAlias(), receivedNode->getNumID(),
|
||||
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
|
||||
receivedNode->getConnPool()->getNicList()),
|
||||
isResponding(true)
|
||||
{}
|
||||
|
||||
MetaNodeEx::MetaNodeEx(std::shared_ptr<Node> receivedNode, std::shared_ptr<MetaNodeEx> oldNode) :
|
||||
Node(NODETYPE_Meta, receivedNode->getAlias(), receivedNode->getNumID(),
|
||||
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
|
||||
receivedNode->getConnPool()->getNicList())
|
||||
{
|
||||
setLastStatRequestTime(oldNode->getLastStatRequestTime());
|
||||
setIsResponding(oldNode->getIsResponding());
|
||||
}
|
||||
55
mon/source/nodes/MetaNodeEx.h
Normal file
55
mon/source/nodes/MetaNodeEx.h
Normal file
@@ -0,0 +1,55 @@
|
||||
#ifndef METANODEEX_H_
|
||||
#define METANODEEX_H_
|
||||
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/Common.h>
|
||||
#include <common/threading/RWLockGuard.h>
|
||||
|
||||
struct MetaNodeDataContent
|
||||
{
|
||||
bool isResponding;
|
||||
unsigned indirectWorkListSize;
|
||||
unsigned directWorkListSize;
|
||||
unsigned sessionCount;
|
||||
std::string hostnameid;
|
||||
};
|
||||
|
||||
class MetaNodeEx: public Node
|
||||
{
|
||||
public:
|
||||
MetaNodeEx(std::shared_ptr<Node> receivedNode);
|
||||
MetaNodeEx(std::shared_ptr<Node> receivedNode, std::shared_ptr<MetaNodeEx> oldNode);
|
||||
|
||||
private:
|
||||
mutable RWLock lock;
|
||||
bool isResponding;
|
||||
std::chrono::milliseconds lastStatRequestTime{0};
|
||||
|
||||
public:
|
||||
std::chrono::milliseconds getLastStatRequestTime() const
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
return lastStatRequestTime;
|
||||
}
|
||||
|
||||
void setLastStatRequestTime(const std::chrono::milliseconds& time)
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_WRITE);
|
||||
lastStatRequestTime = time;
|
||||
}
|
||||
|
||||
bool getIsResponding() const
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
return isResponding;
|
||||
}
|
||||
|
||||
void setIsResponding(bool isResponding)
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_WRITE);
|
||||
this->isResponding = isResponding;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif /*METANODEEX_H_*/
|
||||
6
mon/source/nodes/MgmtNodeEx.cpp
Normal file
6
mon/source/nodes/MgmtNodeEx.cpp
Normal file
@@ -0,0 +1,6 @@
|
||||
#include "MgmtNodeEx.h"
|
||||
|
||||
MgmtNodeEx::MgmtNodeEx(std::string nodeID, NumNodeID nodeNumID, unsigned short portUDP,
|
||||
unsigned short portTCP, NicAddressList& nicList) :
|
||||
Node(NODETYPE_Mgmt, nodeID, nodeNumID, portUDP, portTCP, nicList)
|
||||
{}
|
||||
37
mon/source/nodes/MgmtNodeEx.h
Normal file
37
mon/source/nodes/MgmtNodeEx.h
Normal file
@@ -0,0 +1,37 @@
|
||||
#ifndef MGMTNODEEX_H_
|
||||
#define MGMTNODEEX_H_
|
||||
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/Common.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
struct MgmtdNodeDataContent
|
||||
{
|
||||
bool isResponding;
|
||||
};
|
||||
|
||||
class MgmtNodeEx : public Node
|
||||
{
|
||||
public:
|
||||
MgmtNodeEx(std::string nodeID, NumNodeID nodeNumID, unsigned short portUDP,
|
||||
unsigned short portTCP, NicAddressList& nicList);
|
||||
|
||||
private:
|
||||
MgmtdNodeDataContent data;
|
||||
|
||||
public:
|
||||
MgmtdNodeDataContent getContent()
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
return this->data;
|
||||
}
|
||||
|
||||
void setContent(MgmtdNodeDataContent content)
|
||||
{
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
this->data = content;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /*MGMTNODEEX_H_*/
|
||||
38
mon/source/nodes/NodeStoreMetaEx.cpp
Normal file
38
mon/source/nodes/NodeStoreMetaEx.cpp
Normal file
@@ -0,0 +1,38 @@
|
||||
#include "NodeStoreMetaEx.h"
|
||||
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <nodes/MetaNodeEx.h>
|
||||
|
||||
NodeStoreMetaEx::NodeStoreMetaEx() :
|
||||
NodeStoreServers(NODETYPE_Meta, false)
|
||||
{}
|
||||
|
||||
NodeStoreResult NodeStoreMetaEx::addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
|
||||
NumNodeID* outNodeNumID)
|
||||
{
|
||||
// sanity check: don't allow nodeNumID==0 (only mgmtd allows this)
|
||||
if (!receivedNode->getNumID())
|
||||
return NodeStoreResult::Error;
|
||||
|
||||
std::shared_ptr<MetaNodeEx> newNode;
|
||||
auto storedNode =
|
||||
std::static_pointer_cast<MetaNodeEx>(referenceNode(receivedNode->getNumID()));
|
||||
if (!storedNode)
|
||||
{
|
||||
// new node, create StorageNodeEx object with the parameters of the received node info
|
||||
newNode = std::make_shared<MetaNodeEx>(receivedNode);
|
||||
LOG(GENERAL, DEBUG, "Received new meta node.",
|
||||
("nodeNumID", receivedNode->getNumID().val()));
|
||||
}
|
||||
else
|
||||
{
|
||||
// already stored node, create StorageNodeEx object with the parameters of the
|
||||
// received node info and keep the internal data
|
||||
newNode = std::make_shared<MetaNodeEx>(receivedNode, storedNode);
|
||||
LOG(GENERAL, DEBUG, "Received update for meta node.",
|
||||
("nodeNumID", receivedNode->getNumID().val()));
|
||||
}
|
||||
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
return addOrUpdateNodeUnlocked(std::move(newNode), nullptr);
|
||||
}
|
||||
16
mon/source/nodes/NodeStoreMetaEx.h
Normal file
16
mon/source/nodes/NodeStoreMetaEx.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef NODESTOREMETAEX_H_
|
||||
#define NODESTOREMETAEX_H_
|
||||
|
||||
#include <common/nodes/NodeStore.h>
|
||||
|
||||
class NodeStoreMetaEx : public NodeStoreServers
|
||||
{
|
||||
public:
|
||||
NodeStoreMetaEx();
|
||||
|
||||
virtual NodeStoreResult addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
|
||||
NumNodeID* outNodeNumID) override;
|
||||
|
||||
};
|
||||
|
||||
#endif /*NODESTOREMETAEX_H_*/
|
||||
29
mon/source/nodes/NodeStoreMgmtEx.cpp
Normal file
29
mon/source/nodes/NodeStoreMgmtEx.cpp
Normal file
@@ -0,0 +1,29 @@
|
||||
#include "NodeStoreMgmtEx.h"
|
||||
|
||||
NodeStoreMgmtEx::NodeStoreMgmtEx() :
|
||||
NodeStoreServers(NODETYPE_Mgmt, false)
|
||||
{}
|
||||
|
||||
NodeStoreResult NodeStoreMgmtEx::addOrUpdateNodeEx(std::shared_ptr<Node> node, NumNodeID* outNodeNumID)
|
||||
{
|
||||
std::string nodeID(node->getAlias());
|
||||
NumNodeID nodeNumID = node->getNumID();
|
||||
|
||||
// sanity check: don't allow nodeNumID==0 (only mgmtd allows this)
|
||||
if (!node->getNumID())
|
||||
return NodeStoreResult::Error;
|
||||
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
|
||||
// check if this is a new node
|
||||
auto iter = activeNodes.find(nodeNumID);
|
||||
if (iter == activeNodes.end() )
|
||||
{
|
||||
NicAddressList nicList = node->getNicList();
|
||||
|
||||
node = boost::make_unique<MgmtNodeEx>(nodeID, nodeNumID, node->getPortUDP(),
|
||||
node->getPortTCP(), nicList);
|
||||
}
|
||||
|
||||
return addOrUpdateNodeUnlocked(std::move(node), outNodeNumID);
|
||||
}
|
||||
15
mon/source/nodes/NodeStoreMgmtEx.h
Normal file
15
mon/source/nodes/NodeStoreMgmtEx.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#ifndef NODESTOREMGMTDEX_H_
|
||||
#define NODESTOREMGMTDEX_H_
|
||||
|
||||
#include <common/nodes/NodeStore.h>
|
||||
#include <nodes/MgmtNodeEx.h>
|
||||
|
||||
class NodeStoreMgmtEx : public NodeStoreServers
|
||||
{
|
||||
public:
|
||||
NodeStoreMgmtEx();
|
||||
|
||||
virtual NodeStoreResult addOrUpdateNodeEx(std::shared_ptr<Node> node, NumNodeID* outNodeNumID) override;
|
||||
};
|
||||
|
||||
#endif /*NODESTOREMGMTDEX_H_*/
|
||||
38
mon/source/nodes/NodeStoreStorageEx.cpp
Normal file
38
mon/source/nodes/NodeStoreStorageEx.cpp
Normal file
@@ -0,0 +1,38 @@
|
||||
#include "NodeStoreStorageEx.h"
|
||||
|
||||
#include <common/app/log/Logger.h>
|
||||
#include <nodes/StorageNodeEx.h>
|
||||
|
||||
NodeStoreStorageEx::NodeStoreStorageEx() :
|
||||
NodeStoreServers(NODETYPE_Storage, false)
|
||||
{}
|
||||
|
||||
NodeStoreResult NodeStoreStorageEx::addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
|
||||
NumNodeID* outNodeNumID)
|
||||
{
|
||||
// sanity check: don't allow nodeNumID==0 (only mgmtd allows this)
|
||||
if (!receivedNode->getNumID())
|
||||
return NodeStoreResult::Error;
|
||||
|
||||
std::shared_ptr<StorageNodeEx> newNode;
|
||||
auto storedNode =
|
||||
std::static_pointer_cast<StorageNodeEx>(referenceNode(receivedNode->getNumID()));
|
||||
if (!storedNode)
|
||||
{
|
||||
// new node, create StorageNodeEx object with the parameters of the received node info
|
||||
newNode = std::make_shared<StorageNodeEx>(receivedNode);
|
||||
LOG(GENERAL, DEBUG, "Received new storage node.",
|
||||
("nodeNumID", receivedNode->getNumID().val()));
|
||||
}
|
||||
else
|
||||
{
|
||||
// already stored node, create StorageNodeEx object with the parameters of the
|
||||
// received node info and keep the internal data
|
||||
newNode = std::make_shared<StorageNodeEx>(receivedNode, storedNode);
|
||||
LOG(GENERAL, DEBUG, "Received update for storage node.",
|
||||
("nodeNumID", receivedNode->getNumID().val()));
|
||||
}
|
||||
|
||||
const std::lock_guard<Mutex> lock(mutex);
|
||||
return addOrUpdateNodeUnlocked(std::move(newNode), outNodeNumID);
|
||||
}
|
||||
15
mon/source/nodes/NodeStoreStorageEx.h
Normal file
15
mon/source/nodes/NodeStoreStorageEx.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#ifndef NODESTORESTORAGEEX_H_
|
||||
#define NODESTORESTORAGEEX_H_
|
||||
|
||||
#include <common/nodes/NodeStore.h>
|
||||
|
||||
class NodeStoreStorageEx : public NodeStoreServers
|
||||
{
|
||||
public:
|
||||
NodeStoreStorageEx();
|
||||
|
||||
virtual NodeStoreResult addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
|
||||
NumNodeID* outNodeNumID) override;
|
||||
};
|
||||
|
||||
#endif /*NODESTORESTORAGEEX_H_*/
|
||||
18
mon/source/nodes/StorageNodeEx.cpp
Normal file
18
mon/source/nodes/StorageNodeEx.cpp
Normal file
@@ -0,0 +1,18 @@
|
||||
#include "StorageNodeEx.h"
|
||||
|
||||
StorageNodeEx::StorageNodeEx(std::shared_ptr<Node> receivedNode) :
|
||||
Node(NODETYPE_Storage, receivedNode->getAlias(), receivedNode->getNumID(),
|
||||
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
|
||||
receivedNode->getConnPool()->getNicList()),
|
||||
isResponding(true)
|
||||
{}
|
||||
|
||||
StorageNodeEx::StorageNodeEx(std::shared_ptr<Node> receivedNode,
|
||||
std::shared_ptr<StorageNodeEx> oldNode) :
|
||||
Node(NODETYPE_Storage, receivedNode->getAlias(), receivedNode->getNumID(),
|
||||
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
|
||||
receivedNode->getConnPool()->getNicList())
|
||||
{
|
||||
setLastStatRequestTime(oldNode->getLastStatRequestTime());
|
||||
setIsResponding(oldNode->getIsResponding());
|
||||
}
|
||||
61
mon/source/nodes/StorageNodeEx.h
Normal file
61
mon/source/nodes/StorageNodeEx.h
Normal file
@@ -0,0 +1,61 @@
|
||||
#ifndef STORAGENODEEX_H_
|
||||
#define STORAGENODEEX_H_
|
||||
|
||||
#include <common/nodes/Node.h>
|
||||
#include <common/Common.h>
|
||||
#include <common/threading/RWLockGuard.h>
|
||||
|
||||
struct StorageNodeDataContent
|
||||
{
|
||||
bool isResponding;
|
||||
|
||||
unsigned indirectWorkListSize;
|
||||
unsigned directWorkListSize;
|
||||
|
||||
int64_t diskSpaceTotal;
|
||||
int64_t diskSpaceFree;
|
||||
int64_t diskRead;
|
||||
int64_t diskWrite;
|
||||
|
||||
unsigned sessionCount;
|
||||
std::string hostnameid;
|
||||
};
|
||||
|
||||
class StorageNodeEx : public Node
|
||||
{
|
||||
public:
|
||||
StorageNodeEx(std::shared_ptr<Node> receivedNode);
|
||||
StorageNodeEx(std::shared_ptr<Node> receivedNode, std::shared_ptr<StorageNodeEx> oldNode);
|
||||
|
||||
private:
|
||||
mutable RWLock lock;
|
||||
bool isResponding;
|
||||
std::chrono::milliseconds lastStatRequestTime{0};
|
||||
|
||||
public:
|
||||
std::chrono::milliseconds getLastStatRequestTime() const
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
return lastStatRequestTime;
|
||||
}
|
||||
|
||||
void setLastStatRequestTime(const std::chrono::milliseconds& time)
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
lastStatRequestTime = time;
|
||||
}
|
||||
|
||||
bool getIsResponding() const
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
return isResponding;
|
||||
}
|
||||
|
||||
void setIsResponding(bool isResponding)
|
||||
{
|
||||
RWLockGuard safeLock(lock, SafeRWLock_READ);
|
||||
this->isResponding = isResponding;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /*STORAGENODEEX_H_*/
|
||||
Reference in New Issue
Block a user