New upstream version 8.1.0

This commit is contained in:
geos_one
2025-08-10 01:34:16 +02:00
commit c891bb7105
4398 changed files with 838833 additions and 0 deletions

122
mon/CMakeLists.txt Normal file
View File

@@ -0,0 +1,122 @@
include_directories(
source
)
add_library(
mon STATIC
./source/exception/CurlException.h
./source/exception/DatabaseException.h
./source/net/message/NetMessageFactory.h
./source/net/message/NetMessageFactory.cpp
./source/net/message/nodes/HeartbeatMsgEx.h
./source/components/NodeListRequestor.cpp
./source/components/StatsCollector.h
./source/components/StatsCollector.cpp
./source/components/NodeListRequestor.h
./source/components/worker/GetNodesWork.cpp
./source/components/worker/RequestMetaDataWork.cpp
./source/components/worker/RequestStorageDataWork.cpp
./source/components/worker/RequestStorageDataWork.h
./source/components/worker/RequestMetaDataWork.h
./source/components/worker/GetNodesWork.h
./source/components/CleanUp.cpp
./source/components/CleanUp.h
./source/app/Config.h
./source/app/App.h
./source/app/Config.cpp
./source/app/App.cpp
./source/app/SignalHandler.cpp
./source/app/SignalHandler.h
./source/app/Main.cpp
./source/misc/CurlWrapper.cpp
./source/misc/InfluxDB.cpp
./source/misc/CurlWrapper.h
./source/misc/Cassandra.h
./source/misc/InfluxDB.h
./source/misc/Cassandra.cpp
./source/misc/TSDatabase.h
./source/nodes/NodeStoreMgmtEx.cpp
./source/nodes/NodeStoreStorageEx.cpp
./source/nodes/NodeStoreMetaEx.h
./source/nodes/StorageNodeEx.h
./source/nodes/NodeStoreMetaEx.cpp
./source/nodes/MetaNodeEx.cpp
./source/nodes/MgmtNodeEx.cpp
./source/nodes/NodeStoreStorageEx.h
./source/nodes/StorageNodeEx.cpp
./source/nodes/MetaNodeEx.h
./source/nodes/MgmtNodeEx.h
./source/nodes/NodeStoreMgmtEx.h
)
target_include_directories(
mon PRIVATE
../thirdparty/source/datastax
)
target_link_libraries(
mon
beegfs-common
pthread
dl
curl
)
add_executable(
beegfs-mon
source/app/Main.cpp
)
target_link_libraries(
beegfs-mon
mon
)
# if(NOT BEEGFS_SKIP_TESTS)
# add_executable(
# test-meta
# # no tests yet
# )
#
# target_link_libraries(
# test-mon
# mon
# gtest
# )
#
# # required for a test
# file(
# COPY ${CMAKE_CURRENT_SOURCE_DIR}/build/dist/etc/beegfs-mon.conf
# DESTINATION dist/etc/
# )
#
# add_test(
# NAME test-mon
# COMMAND test-mon --compiler
# )
# endif()
install(
TARGETS beegfs-mon
DESTINATION "usr/sbin"
COMPONENT "mon"
)
install(
FILES "build/dist/usr/lib/systemd/system/beegfs-mon.service" "build/dist/usr/lib/systemd/system/beegfs-mon@.service"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/systemd/system"
COMPONENT "mon"
)
install(
FILES "build/dist/etc/beegfs-mon.conf"
DESTINATION "etc/beegfs"
COMPONENT "mon"
)
install(
FILES "build/dist/etc/beegfs-mon.auth"
DESTINATION "etc/beegfs"
COMPONENT "mon"
)

104
mon/README.txt Normal file
View File

@@ -0,0 +1,104 @@
BeeGFS monitoring service README
================================
Introduction
------------
The BeeGFS monitoring service (beegfs-mon) collects statistical data from the
various BeeGFS nodes and stores it into a time series database (at the moment InfluxDB and Apache
Cassandra are supported).
Prerequisites and dependencies
------------------------------
We highly recommend to use InfluxDB as backend unless you already have a Cassandra Cluster in use
that you want to utilize for mon. The next sections only refer to InfluxDB, if you want to use
Cassandra, please refer to the last paragraph.
InfluxDB and Grafana are NOT included within this package for several reasons:
* The user might want to run the InfluxDB server on another machine and/or wants
to integrate the beegfs-mon into an already existing setup.
* The user might want to use his own or other thirdparty tools to evaluate the
collected data
* They can be updated independently by the user whenever he wants to.
So, to use beegfs-mon, a working and reachable InfluxDB setup is required. Installing
InfluxDB should be simple in most cases since there are prebuilt packages available
for all of the distributions that are supported by BeeGFS.
The installation instructions can be found at
https://docs.influxdata.com/influxdb/v1.3/introduction/installation/ .
Grafana, on the other hand, is completely optional. It's completely up to the user what
he wants to do with the data stored in the database. However, for the sake of simplicity,
we provide some prebuilt Grafana dashboards that can be easily imported into the
Grafana setup and used for monitoring. The installation instructions can be
found at
http://docs.grafana.org/installation/ .
Installation
------------
### Meet the prerequisites
If there isn't an already running InfluxDB service that you want to use, install and start
InfluxDB first (look above for the link to the installation documentation).
If the service runs on another host, make sure it is reachable via HTTP.
### Grafana Dashboards
If you want an out of the box solution, you should use the provided Grafana panels
for visualization. So, install Grafana (again, look above for
the installation instructions) and make sure it can reach the InfluxDB service via network.
#### Default installation
You can then use the provided installation script which can be found under
scripts/import-dashboards. For the out-of-the-box setup with InfluxDB and Grafana being
on the same host, just use
import-dashboards default
#### Custom installation
In any other case, either provide the script with the URLs to InfluxDB and Grafana
(call the script without arguments for usage instruction) or install them manually.
The latter can be done from within Grafanas web interface:
First, the datasource must be defined. In the main menu, click on "Data Sources" and
then "Add Data Source". Enter a name, hostname and port where your InfluxDB is running. Save.
To add the dashboards, select "Dashboards/Import" in the main menu. Navigate to [...] and select
one of the dashboard .json files. Select the datasource you created before in the dropdown and
click "Import". Repeat for the rest of the panels.
You can now click on "Dashboards" in the main menu and then on the Button to the right of it.
A list of the installed dashboards should pop up, in which you can select the one you want to watch.
If your BeeGFS setup, the beegfs-mon daemon and InfluxDB are already running and are configured
properly, you should already see some data being collected.
For more documentation and help in using Grafana, please visit the official website
http://docs.grafana.org.
Apache Cassandra
----------------
If you want to use Cassandra, please be aware that currently there are no Grafana panels for it
available.
To configure beegfs-mon to use Cassandra, you need to install the datastax cassandra client library
on your system which you can find here: https://github.com/datastax/cpp-driver.
It has to be the version 2.9. beegfs-mon loads the library dynamically, so no recompilation is
required. The beegfs-mon config file needs to be edited to use the cassandra plugin. The available
options are explained over there.

27
mon/build/Makefile Normal file
View File

@@ -0,0 +1,27 @@
include ../../build/Makefile
main := ../source/app/Main.cpp
sources := $(filter-out $(main), $(shell find ../source -iname '*.cpp'))
$(call build-static-library,\
Mon,\
$(sources),\
common dl curl cassandra nl3-route,\
../source)
$(call define-dep-lib,\
Mon,\
-I ../source,\
$(build_dir)/libMon.a)
$(call build-executable,\
beegfs-mon,\
$(main),\
Mon common dl curl cassandra nl3-route)
$(call build-test,\
test-runner,\
$(shell find ../tests -name '*.cpp'),\
Mon common dl curl cassandra nl3-route,\
../tests)

9
mon/build/dist/etc/beegfs-mon.auth vendored Normal file
View File

@@ -0,0 +1,9 @@
# This file configures the credentials needed to connect to your monitoring database instance.
# This currently only works with InfluxDB.
username =
password =
# used by influxdb V2 only
organization =
token =

345
mon/build/dist/etc/beegfs-mon.conf vendored Normal file
View File

@@ -0,0 +1,345 @@
# This is a config file for the BeeGFS Mon daemon.
# http://www.beegfs.com
# --- [Table of Contents] ---
#
# 1) Settings
# 2) Command Line Arguments
# 3) Basic Settings Documentation
# 4) Advanced Settings Documentation
#
# --- Section 1.1: [Basic Settings] ---
#
sysMgmtdHost =
#
# --- Section 1.2: [Advanced Settings] ---
#
dbType = influxdb
dbHostName = localhost
dbHostPort = 8086
dbAuthFile =
# used by influxdb only
dbDatabase = beegfs_mon
dbMaxPointsPerRequest = 5000
dbSetRetentionPolicy = true
dbRetentionDuration = 1d
# used by influxdb V2 only
dbBucket =
# used by cassandra only
cassandraMaxInsertsPerBatch = 25
cassandraTTLSecs = 86400
collectClientOpsByNode = true
collectClientOpsByUser = true
statsRequestIntervalSecs = 5
httpTimeoutMSecs = 1000
nodelistRequestIntervalSecs = 30
curlCheckSSLCertificates = true
connMgmtdPort = 8008
connPortShift = 0
connAuthFile = /etc/beegfs/conn.auth
connDisableAuthentication = false
connFallbackExpirationSecs = 900
connMaxInternodeNum = 3
connInterfacesFile =
connNetFilterFile =
connTcpOnlyFilterFile =
logType = syslog
logLevel = 3
logNoDate = false
logNumLines = 50000
logNumRotatedFiles = 2
logStdFile = /var/log/beegfs-mon.log
runDaemonized = true
tuneNumWorkers = 4
#
# --- Section 2: [Command Line Arguments] ---
#
# Use the command line argument "cfgFile=/etc/anotherconfig.conf" to
# specify a different config file for beegfs_mon.
# All other options in this file can also be used as command line
# arguments, overriding the corresponding config file values.
#
# --- Section 3: [Basic Settings Documentation] ---
#
# [sysMgmtdHost]
# Hostname (or IP) of the host running the management service.
# (See also "connMgmtdPort")
# Default: <none>
#
# --- Section 4: [Advanced Settings Documentation] ---
#
#
# --- Section 4.1: [Mon] ---
#
# [dbType]
# The time series database engine to use. Currently, influxdb and cassandra are supported.
# For most use cases, using InfluxDB is recommended because it is easier to use and more
# lightweight.
# [dbHostName]
# The hostname where the database backend runs. Can also be given as an URL including
# protocol. The protocol can be HTTP (default), or, if an SSL encrypted connection
# is required, HTTPS. Example: https://localhost.
# Default: localhost
# [dbPort]
# The port on which the database backend listens for connections.
# Default: 8086 (which is the default port used by InfluxDB)
# [dbDatabase]
# The database/keyspace name in which the data is stored.
# Default: beegfs_mon
# [dbAuthFile]
# Defines a file where the authentication credentials for the database are stored.
# This file should be set to be readable by root only. When mon was installed via
# package, the file was already created and placed at /etc/beegfs/beegfs-mon.auth
# Default: <none>
# [dbMaxPointsPerRequest]
# Sets the max number of points that will be cached until the whole
# set is sent via HTTP to the database backend. After a whole set of statistics has been
# collected, the cached points will be sent in any case. Small values lead to
# many sent requests, and thus, packages, too big ones can exceed certain limits and may
# cause failure. A few thousands is a sensible value here. Only used for InfluxDB.
# Default: 5000
# [dbSetRetentionPolicy]
# Determines whether the service shall automatically apply a retention policy
# to the database at startup. This should only be set to false if the user wants
# to configure the database by himself and wants to have a more sophisticated
# retention policy. Only used for InfluxDB.
# Default: true
# [dbRetentionDuration]
# Defines how long the data points shall be stored until dropped by InfluxDB.
# This is only relevant if dbSetRetentionPolicy is set to true.
# Valid values are in the form ^[0-9]+[smhdw]$, while the suffixes mean
# seconds, minutes, hours, days, weeks. 2d, for example, means two days.
# Only used for InfluxDB. For more details please consult the InfluxDB documentation.
# Default: 1d (one day)
# # [dbBucket]
# The bucket name in which the data is stored.
# [cassandraSetMaxInsertsPerBatch]
# Sets the max number of INSERT statements that will be batched together for execution
# using the thirdparty client library for cassandra. It only accepts a maximum of a few
# thousand bytes by default, so a sensible order of magnitude is around 20 to 30. If
# you get warnings in the log like "Batch for [beegfs_mon.meta,
# beegfs_mon.highresmeta] is of size X, exceeding specified threshold of 5120 by Y.",
# you can try to reduce this number. Only used for Cassandra.
# Default: 25
# [cassandraTTLSecs]
# Defines the number of seconds the data rows shall be stored until marked for removal
# by the database engine. Only used for Cassandra.
# Default: 86400 (one day)
# [collectClientOpsByNode]
# Sets wether mon collects the client ops stats from the nodes, grouped by the client node IP.
# Default: true
# [collectClientOpsByUser]
# Sets wether mon collects the client ops stats from the nodes, grouped by the clients user ID.
# Default: true
# [statsRequestIntervalSecs]
# Sets the waiting interval in seconds between the stats query operation in seconds.
# This does not affect the the high resolution stats (which is always measured in
# one second intervals).
# Default: 5
# [httpTimeoutMSecs]
# Defines the timeout for the http requests that are sent to the InfluxDB daemon
# in milliseconds.
# Default: 1000
# [nodelistRequestIntervalSecs]
# Sets the waiting interval in seconds between the nodelist requests operation
# in seconds. This defines how often the service pulls the newest node lists from
# the management daemon.
# Default: 30
# [curlCheckSSLCertificates]
# Decides whether the servers certificate and hostname shall be checked to be valid when using
# an SSL encrypted connection to an InfluxDB host.
# Disable when using self signed certificates without proper CA certificates.
# Default: true
#
# --- Section 4.2: [Connections & Communication] ---
#
# [connMgmtdPort]
# The UDP and TCP port of the management node.
# Default: 8008
# [connPortShift]
# Shifts all following UDP and TCP ports according to the specified value.
# Intended to make port configuration easier in case you do not want to
# configure each port individually.
# Default: 0
# [connAuthFile]
# The path to a file that contains a shared secret for connection based
# authentication. Only peers that use the same shared secret will be able to
# connect.
# Default: <none>
# [connDisableAuthentication]
# If set to true, explicitly disables connection authentication and allow the
# service to run without a connAuthFile. Running BeeGFS without connection
# authentication is considered insecure and is not recommended.
# Default: false
# [connFallbackExpirationSecs]
# The time in seconds after which a connection to a fallback interface expires.
# When a fallback connection expires, the system will try to establish a new
# connection to the other hosts primary interface (falling back to another
# interface again if necessary).
# Note: The priority of node interfaces can be configured using the
# "connInterfacesFile" parameter.
# Default: 900
# [connMaxInternodeNum]
# The maximum number of simultaneous connections to the same node.
# Default: 3
# [connInterfacesFile]
# The path to a text file that specifies the names of the interfaces which
# may be used for communication by other nodes. One interface per line. The
# line number also defines the priority of an interface.
# Example: "ib0" in the first line, "eth0" in the second line.
# Values: This setting is optional. If unspecified, all available interfaces
# will be published and priorities will be assigned automatically.
# Note: This information is sent to other hosts to inform them about possible
# communication paths. See connRestrictOutboundInterfaces for this
# configuration's potential effect on outbound connections.
# Default: <none>
# [connRestrictOutboundInterfaces]
# The default behavior of BeeGFS is to use any available network interface
# to establish an outbound connection to a node, according to the TCP/IP
# configuration of the operating system. When connRestrictOutboundInterfaces
# is set to true, the network interfaces used for outbound connections are
# limited to the values specified by connInterfacesFile or connInterfacesList.
# The operating system routing tables are consulted to determine which
# interface to use for a particular node's IP address. If there is no
# route from the configured interfaces that is suitable for a node's IP
# address then the connection will fail to be established.
# Default: false
# [connNoDefaultRoute]
# When connRestrictOutboundInterfaces is true, the routing logic will use
# the default route for a Node's IP address when no specific route for that
# address is found in the routing tables. This can be problematic during a
# failure situation, as the default route is not appropriate to use for a
# subnet that is accessible from an interface that has failed.
# connNoDefaultRoute is a comma-separated list of CIDRs that should never
# be accessed via the default route.
# Default: 0.0.0.0/0. This prevents the default route from ever being used.
# [connNetFilterFile]
# The path to a text file that specifies allowed IP subnets, which may be used
# for outgoing communication. One subnet per line in classless notation (IP
# address and number of significant bits).
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
# line.
# Values: This setting is optional. If unspecified, all addresses are allowed
# for outgoing communication.
# Default: <none>
# [connTcpOnlyFilterFile]
# The path to a text file that specifies IP address ranges to which no RDMA connection should be
# established. This is useful e.g. for environments where all hosts support RDMA, but some hosts
# cannot connect via RDMA to some other hosts.
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
# line.
# Values: This setting is optional.
# Default: <none>
#
# --- Section 4.3: [Logging] ---
#
# [logType]
# Defines the logger type. This can either be "syslog" to send log messages to
# the general system logger or "logfile". If set to logfile, logs will be written
# to logStdFile.
# Default: logfile
# [logLevel]
# Defines the amount of output messages. The higher this level, the more
# detailed the log messages will be.
# Note: Levels above 2 might decrease performance.
# Default: 2 (Max: 5)
# [logNoDate]
# Defines whether "date & time" (=false) or the current "time only" (=true)
# should be logged.
# Default: false
# [logNumLines]
# The maximum number of lines per log file.
# Default: 50000
# [logNumRotatedFiles]
# The number of old files to keep when "logNumLines" is reached and the log file
# is rewritten. (Log rotation)
# Default: 2
# [logStdFile]
# The path and filename of the log file for standard log messages. If no name
# is specified, the messages will be written to the console.
# Default: /var/log/beegfs-mon.log
#
# --- Section 4.4: [Startup] ---
#
# [runDaemonized]
# Detach the process from its parent (and from stdin/-out/-err).
# Default: true
#
# --- Section 4.5: [Tuning] ---
#
# [tuneNumWorkers]
# The number of worker threads. Should be at least 3. A value of up to twice the
# number of CPU cores of your machine is the recommended choice.
# Default: 4

29
mon/build/dist/etc/default/beegfs-mon vendored Normal file
View File

@@ -0,0 +1,29 @@
# BeeGFS mon service configuration.
# Note: This file is only used together with sysV init scripts.
# If your system uses systemd, this file is ignored.
# In this case:
#
# - use `systemctl enable / disable` to activate / decativate a service
#
# - systemd service templates are used for multimode
# (See https://www.beegfs.io/wiki/MultiMode)
#
#
# Set to "NO" to disable start of the BeeGFS mon daemon via the init
# script.
START_SERVICE="YES"
# Set to "YES" if you want to start multiple mon daemons with different
# configuration files on this machine.
#
# Create a subdirectory with the ending ".d" in "/etc/beegfs/" for every config
# file. The subdirectory name will be used to identify a particular server
# instance for init script start/stop.
#
# Note: The original config file in /etc/beegfs will not be used when multi-mode
# is enabled.
#
# Example: /etc/beegfs/scratch.d/beegfs-mon.conf
# $ /etc/init.d/beegfs-mon start scratch
MULTI_MODE="NO"

22
mon/build/dist/etc/init.d/beegfs-mon.init vendored Executable file
View File

@@ -0,0 +1,22 @@
#!/bin/bash
#
### BEGIN INIT INFO
# Provides: beegfs-mon
# Required-Start:
# Should-Start: $network
# Required-Stop:
# Should-Stop: $networkm
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# chkconfig: 35 95 9
# Short-Description: BeeGFS Mon
# Description: Start BeeGFS Mon
### END INIT INFO
APP_NAME="BeeGFS Mon"
SERVICE_NAME=beegfs-mon
# source function library
. /etc/beegfs/lib/start-stop-functions
. /etc/beegfs/lib/init-multi-mode

View File

@@ -0,0 +1,14 @@
[Unit]
Description=BeeGFS Mon Server
Documentation=http://www.beegfs.com/content/documentation/
Requires=network-online.target
# We disable the wants service, because it spams the log files
#Wants=beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service
After=network-online.target beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service zfs.target
[Service]
ExecStart=/opt/beegfs/sbin/beegfs-mon cfgFile=/etc/beegfs/beegfs-mon.conf runDaemonized=false
Type=simple
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,14 @@
[Unit]
Description=BeeGFS Mon Server (multimode)
Documentation=http://www.beegfs.com/content/documentation/
Requires=network-online.target
# We disable the wants service, because it spams the log files
#Wants=beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service
After=network-online.target beegfs-mgmtd.service beegfs-storage.service openibd.service openib.service rdma.service opensmd.service opensm.service zfs.target
[Service]
ExecStart=/opt/beegfs/sbin/beegfs-mon cfgFile=/etc/beegfs/%I.d/beegfs-mon.conf runDaemonized=false
Type=simple
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,155 @@
{
"id": 2,
"uid": "cf53330f-49cf-4b1e-bb59-e4580d32e707",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "CPU Alert",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"groupBy": [
{
"params": [
"host::tag"
],
"type": "tag"
}
],
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"measurement": "cpu",
"orderByTime": "ASC",
"policy": "auto",
"query": "SELECT mean(\"usage_system\") FROM \"auto\".\"cpu\" WHERE $timeFilter GROUP BY \"host\"::tag",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"usage_system"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0,
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0,
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "$B > 80",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "math"
}
}
],
"updated": "2023-10-17T18:28:40+05:30",
"noDataState": "OK",
"execErrState": "Error",
"for": "30m",
"annotations": {
"summary": "CPU usage is above thershold set",
"description": "Please check host \"{{ $labels.host }}\" its cpu usage is above thershold"
},
"labels": {
"cpu-severity": "{{if gt $values.B.Value 90.0}}critical{{else if gt $values.B.Value 80.0}}warning{{else}}info{{end}}"
},
"isPaused": true
}

View File

@@ -0,0 +1,125 @@
{
"id": 2,
"uid": "c1ec4ef2-dae2-4c85-b478-8119bb4326e6",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "CPU Alert",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"query": "from(bucket: \"${BUCKET}\") \r\n|> range(start: v.timeRangeStart, stop: v.timeRangeStop) \r\n|> filter(fn: (r) => r[\"_measurement\"] == \"cpu\") \r\n|> filter(fn: (r) => r[\"_field\"] == \"usage_system\")\r\n|> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\") \r\n|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) \r\n|> yield(name: \"mean\")",
"refId": "A"
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"B"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0,
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "$B > 80",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "math"
}
}
],
"updated": "2023-10-17T12:42:56Z",
"noDataState": "OK",
"execErrState": "Error",
"for": "30m",
"annotations": {
"description": "Please check host \"{{ $labels.host }}\" its cpu usage is above thershold",
"summary": "CPU usage is above thershold set"
},
"labels": {
"cpu-severity": "{{ if gt $values.B.Value 90.0 }}critical{{ else if gt $values.B.Value 80.0 }}warning{{ else }}info{{ end }}"
},
"isPaused": true
}

View File

@@ -0,0 +1,156 @@
{
"id": 3,
"uid": "af36a69e-fd32-4ebc-94cd-474ea6c9edb2",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "Disk Alert",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"groupBy": [
{
"params": [
"storageTargetID::tag"
],
"type": "tag"
}
],
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"measurement": "storageTargets",
"orderByTime": "ASC",
"policy": "auto",
"query": "SELECT (diskSpaceFree / diskSpaceTotal) * 100 FROM \"auto\".\"storageTargets\" WHERE $timeFilter GROUP BY \"storageTargetID\"::tag",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"diskSpaceFree"
],
"type": "field"
},
{
"params": [
" / "
],
"type": "math"
}
]
],
"tags": []
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0,
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0,
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "$B < 30",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "math"
}
}
],
"updated": "2023-10-10T16:06:31+05:30",
"noDataState": "OK",
"execErrState": "Error",
"for": "10m",
"annotations": {
"description": "Please check, as the Disk Space for Storage Target ID '{{ $labels.storageTargetID }}' is only {{ humanize $values.B.Value }}%.",
"summary": "BeeGFS Storage Target disk space is low"
},
"labels": {
"disk-severity": "{{if lt $values.B.Value 20.0}}critical{{else if lt $values.B.Value 30.0}}warning{{else}}info{{end}}"
},
"isPaused": true
}

View File

@@ -0,0 +1,125 @@
{
"id": 3,
"uid": "c0008edf-2473-47be-b0ff-ab50bad831c5",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "Disk Alert",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"query": "from(bucket: \"${BUCKET}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r[\"_measurement\"] == \"storageTargets\")\r\n |> filter(fn: (r) => r._field == \"diskSpaceTotal\" or r._field == \"diskSpaceFree\")\r\n |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n |> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> map(fn: (r) => ({ r with _value:(r.diskSpaceFree/ r.diskSpaceTotal) * 100.0 }))\r\n |> rename(columns: {_value: \"DiskFreePercent\"})\r\n |> drop(columns:[\"_start\",\"_stop\",\"_measurement\",\"diskSpaceTotal\",\"diskSpaceFree\"])\r\n",
"refId": "A"
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"B"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0,
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "$B < 30",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "math"
}
}
],
"updated": "2023-10-16T18:16:45Z",
"noDataState": "OK",
"execErrState": "Error",
"for": "10m",
"annotations": {
"description": "Please check, as the Disk Space for Storage Target ID '{{ $labels.storageTargetID }}' is only {{ humanize $values.B.Value }}%.",
"summary": "BeeGFS Storage Target disk space is low"
},
"labels": {
"disk-severity": "{{if lt $values.B.Value 20.0}}critical{{else if lt $values.B.Value 30.0}}warning{{else}}info{{end}}"
},
"isPaused": true
}

View File

@@ -0,0 +1,156 @@
{
"id": 4,
"uid": "e2ad5c16-110f-43df-a784-829561fe3317",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "Inodes Alert ",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"groupBy": [
{
"params": [
"storageTargetID::tag"
],
"type": "tag"
}
],
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"measurement": "storageTargets",
"orderByTime": "ASC",
"policy": "auto",
"query": "SELECT (inodesFree / inodesTotal) * 100 FROM \"auto\".\"storageTargets\" WHERE $timeFilter GROUP BY \"storageTargetID\"::tag",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"diskSpaceFree"
],
"type": "field"
},
{
"params": [
" / "
],
"type": "math"
}
]
],
"tags": []
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0,
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0,
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "$B < 20",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "math"
}
}
],
"updated": "2023-10-10T16:06:31+05:30",
"noDataState": "OK",
"execErrState": "Error",
"for": "10m",
"annotations": {
"description": "Please check, as the free inodes for Storage Target ID '{{ $labels.storageTargetID }}' are only at {{ humanize $values.B.Value }}%.",
"summary": "BeeGFS Storage Target Inodes are below the threshold."
},
"labels": {
"inodes": "free"
},
"isPaused": true
}

View File

@@ -0,0 +1,125 @@
{
"id": 4,
"uid": "be096d59-9dc4-4821-9530-8447e7261d9c",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "Inodes Alert",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"query": "from(bucket: \"${BUCKET}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r[\"_measurement\"] == \"storageTargets\")\r\n |> filter(fn: (r) => r._field == \"inodesTotal\" or r._field == \"inodesFree\")\r\n |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n |> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> map(fn: (r) => ({ r with _value:(r.inodesFree/ r.inodesTotal) * 100.0 }))\r\n |> rename(columns: {_value: \"InodesFreePercent\"})\r\n |> drop(columns:[\"_start\",\"_stop\",\"_measurement\",\"inodesFree\",\"inodesTotal\"])",
"refId": "A"
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"B"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0,
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "$B < 20",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "math"
}
}
],
"updated": "2023-10-16T18:16:45Z",
"noDataState": "OK",
"execErrState": "Error",
"for": "10m",
"annotations": {
"description": "Please check, as the free inodes for Storage Target ID '{{ $labels.storageTargetID }}' are only at {{ humanize $values.B.Value }}%.",
"summary": "BeeGFS Storage Target Inodes are below the threshold."
},
"labels": {
"inodes": "free"
},
"isPaused": true
}

View File

@@ -0,0 +1,151 @@
{
"id": 5,
"uid": "bc49ff76-3db9-4f8b-b88a-947c7717fc18",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "Meta Queued Request Alert",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"groupBy": [
{
"params": [
"nodeID::tag"
],
"type": "tag"
}
],
"intervalMs": 1000,
"maxDataPoints": 43200,
"measurement": "highResMeta",
"orderByTime": "ASC",
"policy": "auto",
"query": "SELECT max(\"queuedRequests\") FROM \"auto\".\"highResMeta\" WHERE $timeFilter GROUP BY \"nodeID\"::tag",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"queuedRequests"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": []
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"B"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
50
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"C"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "B",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "threshold"
}
}
],
"updated": "2023-10-17T18:57:50+05:30",
"noDataState": "OK",
"execErrState": "Error",
"for": "2m",
"annotations": {
"description": "Queued requests of BeeGFS meta server with nodeID - \"{{ $labels.nodeID }}\" is {{ $values.B }}",
"summary": "Meta server queued requests is above threshold"
},
"labels": {
"queued": "request"
},
"isPaused": true
}

View File

@@ -0,0 +1,126 @@
{
"id": 5,
"uid": "a5a9072e-a8c2-46c1-b3a0-88608956e83e",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "Meta Queued Request Alert",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"query": "from(bucket: \"${BUCKET}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r._measurement == \"highResMeta\" and r._field == \"queuedRequests\") \r\n|> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) \r\n|> yield(name: \"max\")",
"refId": "A"
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"B"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
50
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"C"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "B",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "threshold"
}
}
],
"updated": "2023-10-16T18:16:45Z",
"noDataState": "OK",
"execErrState": "Error",
"for": "2m",
"annotations": {
"description": "Queued requests of BeeGFS meta server with nodeID - \"{{ $labels.nodeID }}\" is {{ $values.B }}",
"summary": "Meta server queued requests is above threshold"
},
"labels": {
"queued": "request"
},
"isPaused": true
}

View File

@@ -0,0 +1,158 @@
{
"id": 1,
"uid": "d9a3e5ba-b5bc-4ede-989b-c605547eb2d",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "Services Alert",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"groupBy": [
{
"params": [
"host"
],
"type": "tag"
},
{
"params": [
"systemd_unit"
],
"type": "tag"
}
],
"intervalMs": 1000,
"maxDataPoints": 43200,
"measurement": "procstat_lookup",
"orderByTime": "ASC",
"policy": "auto",
"query": "SELECT last(\"running\") FROM \"auto\".\"procstat_lookup\" WHERE $timeFilter GROUP BY \"host\", \"systemd_unit\"",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"running"
],
"type": "field"
},
{
"params": [],
"type": "last"
}
]
]
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0,
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
1,
0
],
"type": "lt"
},
"operator": {
"type": "and"
},
"query": {
"params": []
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "B",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "threshold"
}
}
],
"updated": "2023-10-17T09:19:39+05:30",
"noDataState": "OK",
"execErrState": "Error",
"for": "1m",
"annotations": {
"description": "BeeGFS Service \"{{ $labels.systemd_unit }}\" is Down , Please check host \"{{ $labels.host }}\"",
"summary": "BeeGFS Service \"{{ $labels.systemd_unit }}\" is Down"
},
"labels": {
"service_status": "down"
},
"isPaused": true
}

View File

@@ -0,0 +1,126 @@
{
"id": 1,
"uid": "a96d9b2e-2a6b-4ab3-9858-200da324672f",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "Service Alert",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"query": "from(bucket: \"${BUCKET}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r[\"_measurement\"] == \"procstat_lookup\")\r\n |> filter(fn: (r) => r._field == \"running\")\r\n |> group(columns: [\"host\", \"systemd_unit\"], mode: \"by\")\r\n |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n |> keep(columns: [\"_time\", \"_value\", \"host\", \"systemd_unit\"])\r\n |> sort(columns: [\"_time\"])",
"refId": "A"
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"B"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
1
],
"type": "lt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"C"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "B",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "threshold"
}
}
],
"updated": "2023-10-17T11:33:42Z",
"noDataState": "OK",
"execErrState": "Error",
"for": "1m",
"annotations": {
"description": "BeeGFS Service \"{{ $labels.systemd_unit }}\" is Down , Please check host \"{{ $labels.host }}\"",
"summary": "BeeGFS Service \"{{ $labels.systemd_unit }}\" is Down"
},
"labels": {
"service_status": "down"
},
"isPaused": true
}

View File

@@ -0,0 +1,151 @@
{
"id": 6,
"uid": "c81b9c61-d553-4240-aff1-e92627a40a11",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "Storage Queued Request Alert ",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"groupBy": [
{
"params": [
"nodeID::tag"
],
"type": "tag"
}
],
"intervalMs": 1000,
"maxDataPoints": 43200,
"measurement": "highResStorage",
"orderByTime": "ASC",
"policy": "auto",
"query": "SELECT max(\"queuedRequests\") FROM \"auto\".\"highResStorage\" WHERE $timeFilter GROUP BY \"nodeID\"::tag",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"queuedRequests"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": []
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"B"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
50
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"C"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "B",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "threshold"
}
}
],
"updated": "2023-10-17T19:13:11+05:30",
"noDataState": "OK",
"execErrState": "Error",
"for": "2m",
"annotations": {
"description": "Queued requests of BeeGFS Storage Server with nodeID - \"{{ $labels.nodeID }}\" is {{ $values.B }}",
"summary": "Storage server queued requests is above threshold"
},
"labels": {
"queued": "request"
},
"isPaused": true
}

View File

@@ -0,0 +1,126 @@
{
"id": 6,
"uid": "e0a4e911-6602-4adc-993b-d65672e7f431",
"orgID": 1,
"folderUID": "beegfsalertfolder",
"ruleGroup": "evaluate",
"title": "Storage Queued Request Alert",
"condition": "C",
"data": [
{
"refId": "A",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "${DS_UID}",
"model": {
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"query": "from(bucket: \"${BUCKET}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r._measurement == \"highResStorage\" and r._field == \"queuedRequests\") \r\n|> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) \r\n|> yield(name: \"max\")",
"refId": "A"
}
},
{
"refId": "B",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"B"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"reducer": "last",
"refId": "B",
"type": "reduce"
}
},
{
"refId": "C",
"queryType": "",
"relativeTimeRange": {
"from": 600,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
50
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"C"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "B",
"hide": false,
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "threshold"
}
}
],
"updated": "2023-10-16T18:16:45Z",
"noDataState": "OK",
"execErrState": "Error",
"for": "2m",
"annotations": {
"description": "Queued requests of BeeGFS storage server with nodeID - \"{{ $labels.nodeID }}\" is {{ $values.B }}",
"summary": "Storage server queued requests is above threshold"
},
"labels": {
"queued": "request"
},
"isPaused": true
}

View File

@@ -0,0 +1,121 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "panel",
"id": "alertlist",
"name": "Alert list",
"version": ""
},
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "10.1.4"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"gridPos": {
"h": 22,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"alertInstanceLabelFilter": "",
"alertName": "",
"dashboardAlerts": false,
"folder": {
"title": "BeeGFS-Alert",
"uid": "beegfsalertfolder"
},
"groupBy": [],
"groupMode": "default",
"maxItems": 20,
"sortOrder": 1,
"stateFilter": {
"error": true,
"firing": true,
"noData": false,
"normal": true,
"pending": true
},
"viewMode": "list"
},
"pluginVersion": "10.1.4",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"refId": "A"
}
],
"title": "Alert List",
"type": "alertlist"
}
],
"refresh": "",
"schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Alerts List",
"uid": "c4a31d8f-4dc6-4023-bc7a-1b06167a6f74",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,12 @@
{
"uid": "d5c51f44-07d047ca-a580-5a66f643e",
"name": "beegfs-email",
"type": "email",
"settings": {
"addresses": "beegfsalert@example.com",
"message": "{{ template \"beegfs.message\" . }}",
"singleEmail": false,
"subject": "{{ template \"beegfs.title\" . }}"
},
"disableResolveMessage": false
}

View File

@@ -0,0 +1,4 @@
{
"name": "BeeGFS-Email-Template",
"template": "{{ define \"alert_severity_prefix_emoji\" -}}\n\t{{- if eq .Status \"firing\" -}}\n\t\t🔥\n\t{{- else -}}\n\t\t✅\n\t{{- end -}}\n{{- end -}}\n\n{{ define \"beegfs_subject\" }}\n{{ template \"alert_severity_prefix_emoji\" . }}\n[{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ if gt (.Alerts.Resolved | len) 0 }}, RESOLVED:{{ .Alerts.Resolved | len }}{{ end }}{{ end }} | {{ .CommonLabels.alertname -}}] \n{{ end }}\n\n{{ define \"beegfs_text_alert_list\" }}{{ range . }}\nSummary:\n\t{{ .Annotations.summary }}\n\t\nDescription:\n\t{{ .Annotations.description }}\n\nLabels:\n {{ range .Labels.SortedPairs -}}\n {{ .Name }} = {{ .Value }}\n\t{{ end }}\n{{ end }}\n{{ range . }}\n{{ if gt (len .SilenceURL) 0 }}Silence: {{ .SilenceURL }}\n{{ end }}\n{{ end }}\n{{ end }}\n\n\n{{ define \"beegfs.title\" }}{{ template \"beegfs_subject\" . }}{{ end }}\n\n{{ define \"beegfs.message\" }}{{ if gt (len .Alerts.Firing) 0 }}*Firing 🔥*\n{{ template \"beegfs_text_alert_list\" .Alerts.Firing }}{{ if gt (len .Alerts.Resolved) 0 }}\n\n{{ end }}{{ end }}\n\n{{ if gt (len .Alerts.Resolved) 0 }}*Resolved ✅*\n\nBelow alert is resolved:\n{{ template \"beegfs_text_alert_list\" .Alerts.Resolved }}\n{{ end }}\n{{ end }}"
}

View File

@@ -0,0 +1,97 @@
{
"receiver": "grafana-default-email",
"routes": [
{
"receiver": "beegfs-email",
"object_matchers": [
[
"disk-severity",
"=",
"warning"
]
],
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "6h"
},
{
"receiver": "beegfs-email",
"object_matchers": [
[
"disk-severity",
"=",
"critical"
]
],
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "6h"
},
{
"receiver": "beegfs-email",
"object_matchers": [
[
"service_status",
"=",
"down"
]
],
"group_wait": "30s",
"group_interval": "3m",
"repeat_interval": "6h"
},
{
"receiver": "beegfs-email",
"object_matchers": [
[
"inodes",
"=",
"free"
]
],
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "6h"
},
{
"receiver": "beegfs-email",
"object_matchers": [
[
"cpu-severity",
"=",
"critical"
]
],
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "6h"
},
{
"receiver": "beegfs-email",
"object_matchers": [
[
"cpu-severity",
"=",
"warning"
]
],
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "6h"
},
{
"receiver": "beegfs-email",
"object_matchers": [
[
"queued",
"=",
"request"
]
],
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "6h"
}
]
}

View File

@@ -0,0 +1,58 @@
{
"receiver": "grafana-default-email",
"routes": [
{
"receiver": "beegfs-email",
"object_matchers": [
[
"disk-severity",
"=",
"warning"
]
],
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "6h"
},
{
"receiver": "beegfs-email",
"object_matchers": [
[
"disk-severity",
"=",
"critical"
]
],
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "6h"
},
{
"receiver": "beegfs-email",
"object_matchers": [
[
"inodes",
"=",
"free"
]
],
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "6h"
},
{
"receiver": "beegfs-email",
"object_matchers": [
[
"queued",
"=",
"request"
]
],
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "6h"
}
]
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,386 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"id": 4,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "SELECT sum(*) FROM \"metaClientOpsByNode\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"node\" =~ /^$nodeID$/ GROUP BY node",
"rawQuery": true,
"refId": "A",
"resultFormat": "table"
}
],
"title": "Meta Operation List",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {
"Time": 0,
"node": 1,
"sum_close": 2,
"sum_createLI": 3,
"sum_getXA": 16,
"sum_hardlnk": 18,
"sum_listXA": 19,
"sum_mdsInf": 4,
"sum_mkdir": 5,
"sum_open": 6,
"sum_rddir": 7,
"sum_ren": 8,
"sum_revalLI": 9,
"sum_rmdir": 10,
"sum_sAttr": 11,
"sum_sChDrct": 12,
"sum_stat": 13,
"sum_statLI": 14,
"sum_sum": 15,
"sum_trunc": 17,
"sum_unlnk": 20
},
"renameByName": {
"node": "",
"sum_close": "close",
"sum_create": "create",
"sum_createLI": "createLI",
"sum_dirparent": "dirparent",
"sum_entInf": "entInf",
"sum_flckAp": "flckAp",
"sum_flckEn": "flckzEn",
"sum_flckRg": "flckRg",
"sum_fndOwn": "fndOwn",
"sum_getXA": "getXA",
"sum_hardlnk": "hardlnk",
"sum_listXA": "listXA",
"sum_lookLI": "lookLI",
"sum_mdsInf": "mdsInf",
"sum_mirror": "mirror",
"sum_mkdir": "mkdir",
"sum_mvDirIns": "mvDirIns",
"sum_mvFiIns": "mvFilns",
"sum_open": "open",
"sum_openLI": "openLI",
"sum_rddir": "rddir",
"sum_refrEnt": "refrEnt",
"sum_ren": "ren",
"sum_revalLI": "revalLI",
"sum_rmLnk": "rmLnk",
"sum_rmXA": "rmXA",
"sum_rmdir": "rmdir",
"sum_sAttr": "sAttr",
"sum_sChDrct": "sChDrct",
"sum_sDirPat": "sDirPat",
"sum_setXA": "setXA",
"sum_stat": "stat",
"sum_statLI": "statLI",
"sum_statfs": "statfs",
"sum_sum": "sum",
"sum_symlnk": "symlnk",
"sum_trunc": "trunc",
"sum_unlnk": "unlnk"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "sum_B-rd"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
},
{
"matcher": {
"id": "byName",
"options": "sum_B-wr"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 9
},
"id": 2,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"node"
],
"type": "tag"
}
],
"measurement": "metaClientOpsByNode",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(*) FROM \"storageClientOpsByNode\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"node\" =~ /^$nodeID$/ GROUP BY node",
"rawQuery": true,
"refId": "A",
"resultFormat": "table",
"select": [
[
{
"params": [
"*"
],
"type": "field"
}
]
],
"tags": []
}
],
"title": "Storage Operation List",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {},
"renameByName": {
"sum_B-rd": "B-rd",
"sum_B-wr": "B-wr",
"sum_close": "close",
"sum_getFSize": "getFSize",
"sum_ops-rd": "ops-rd",
"sum_ops-wr": "ops-wr",
"sum_sAttr": "sAttr",
"sum_sChDrct": "sChDrct",
"sum_statfs": "statfs",
"sum_storInf": "storInf",
"sum_sum": "sum",
"sum_trunc": "trunc",
"sum_unlnk": "unlnk"
}
}
}
],
"type": "table"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "show tag values from metaClientOpsByNode with key = \"node\"",
"hide": 0,
"includeAll": true,
"label": "nodeID",
"multi": true,
"name": "nodeID",
"options": [],
"query": "show tag values from metaClientOpsByNode with key = \"node\"",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Client Operations (by Node)",
"uid": "HHI9d8UO",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,273 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByNode\")\r\n |> filter(fn: (r) => r.node =~ /${nodeID:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"node\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n \r\n",
"refId": "A"
}
],
"title": "Meta Operation List",
"type": "table"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "^B-wr"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "^B-rd"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 9
},
"id": 4,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": " from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByNode\")\r\n |> filter(fn: (r) => r.node =~ /${nodeID:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"node\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n \r\n",
"refId": "A"
}
],
"title": "Storage Operation List",
"type": "table"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "buckets()",
"hide": 0,
"includeAll": false,
"label": "Bucket",
"multi": false,
"name": "bucket",
"options": [],
"query": "buckets()",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "import \"influxdata/influxdb/schema\"\r\n\r\nschema.measurementTagValues(\r\n bucket: \"${bucket}\",\r\n tag: \"node\",\r\n measurement: \"metaClientOpsByNode\"\r\n)",
"hide": 0,
"includeAll": true,
"label": "nodeID",
"multi": true,
"name": "nodeID",
"options": [],
"query": "import \"influxdata/influxdb/schema\"\r\n\r\nschema.measurementTagValues(\r\n bucket: \"${bucket}\",\r\n tag: \"node\",\r\n measurement: \"metaClientOpsByNode\"\r\n)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-2d",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Client Operations (by Node)",
"uid": "V5Me2Vk",
"version": 2,
"weekStart": ""
}

View File

@@ -0,0 +1,386 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"id": 4,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "SELECT sum(*) FROM \"metaClientOpsByNode\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"node\" =~ /^$nodeID$/ GROUP BY \"node\"",
"rawQuery": true,
"refId": "A",
"resultFormat": "table"
}
],
"title": "Meta Operation List",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {
"Time": 0,
"node": 1,
"sum_close": 2,
"sum_createLI": 3,
"sum_getXA": 16,
"sum_hardlnk": 18,
"sum_listXA": 19,
"sum_mdsInf": 4,
"sum_mkdir": 5,
"sum_open": 6,
"sum_rddir": 7,
"sum_ren": 8,
"sum_revalLI": 9,
"sum_rmdir": 10,
"sum_sAttr": 11,
"sum_sChDrct": 12,
"sum_stat": 13,
"sum_statLI": 14,
"sum_sum": 15,
"sum_trunc": 17,
"sum_unlnk": 20
},
"renameByName": {
"node": "",
"sum_close": "close",
"sum_create": "create",
"sum_createLI": "createLI",
"sum_dirparent": "dirparent",
"sum_entInf": "entInf",
"sum_flckAp": "flckAp",
"sum_flckEn": "flckzEn",
"sum_flckRg": "flckRg",
"sum_fndOwn": "fndOwn",
"sum_getXA": "getXA",
"sum_hardlnk": "hardlnk",
"sum_listXA": "listXA",
"sum_lookLI": "lookLI",
"sum_mdsInf": "mdsInf",
"sum_mirror": "mirror",
"sum_mkdir": "mkdir",
"sum_mvDirIns": "mvDirIns",
"sum_mvFiIns": "mvFilns",
"sum_open": "open",
"sum_openLI": "openLI",
"sum_rddir": "rddir",
"sum_refrEnt": "refrEnt",
"sum_ren": "ren",
"sum_revalLI": "revalLI",
"sum_rmLnk": "rmLnk",
"sum_rmXA": "rmXA",
"sum_rmdir": "rmdir",
"sum_sAttr": "sAttr",
"sum_sChDrct": "sChDrct",
"sum_sDirPat": "sDirPat",
"sum_setXA": "setXA",
"sum_stat": "stat",
"sum_statLI": "statLI",
"sum_statfs": "statfs",
"sum_sum": "sum",
"sum_symlnk": "symlnk",
"sum_trunc": "trunc",
"sum_unlnk": "unlnk"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "sum_B-rd"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
},
{
"matcher": {
"id": "byName",
"options": "sum_B-wr"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 9
},
"id": 2,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"node"
],
"type": "tag"
}
],
"measurement": "metaClientOpsByNode",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(*) FROM \"storageClientOpsByNode\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"node\" =~ /^$nodeID$/ GROUP BY node",
"rawQuery": true,
"refId": "A",
"resultFormat": "table",
"select": [
[
{
"params": [
"*"
],
"type": "field"
}
]
],
"tags": []
}
],
"title": "Storage Operation List",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {},
"renameByName": {
"sum_B-rd": "B-rd",
"sum_B-wr": "B-wr",
"sum_close": "close",
"sum_getFSize": "getFSize",
"sum_ops-rd": "ops-rd",
"sum_ops-wr": "ops-wr",
"sum_sAttr": "sAttr",
"sum_sChDrct": "sChDrct",
"sum_statfs": "statfs",
"sum_storInf": "storInf",
"sum_sum": "sum",
"sum_trunc": "trunc",
"sum_unlnk": "unlnk"
}
}
}
],
"type": "table"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "show tag values from metaClientOpsByNode with key = \"node\"",
"hide": 0,
"includeAll": true,
"label": "Node ID",
"multi": true,
"name": "nodeID",
"options": [],
"query": "show tag values from metaClientOpsByNode with key = \"node\"",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Client Operations (by Node)",
"uid": "HHI9dJV4k",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,273 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByNode\")\r\n |> filter(fn: (r) => r.node =~ /${nodeID:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"node\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n \r\n",
"refId": "A"
}
],
"title": "Meta Operation List",
"type": "table"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "^B-wr"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "^B-rd"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 9
},
"id": 4,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": " from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByNode\")\r\n |> filter(fn: (r) => r.node =~ /${nodeID:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"node\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n \r\n",
"refId": "A"
}
],
"title": "Storage Operation List",
"type": "table"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "buckets()",
"hide": 0,
"includeAll": false,
"label": "Bucket",
"multi": false,
"name": "bucket",
"options": [],
"query": "buckets()",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "import \"influxdata/influxdb/schema\"\r\n\r\nschema.measurementTagValues(\r\n bucket: \"${bucket}\",\r\n tag: \"node\",\r\n measurement: \"metaClientOpsByNode\"\r\n)",
"hide": 0,
"includeAll": true,
"label": "nodeID",
"multi": true,
"name": "nodeID",
"options": [],
"query": "import \"influxdata/influxdb/schema\"\r\n\r\nschema.measurementTagValues(\r\n bucket: \"${bucket}\",\r\n tag: \"node\",\r\n measurement: \"metaClientOpsByNode\"\r\n)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-2d",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Client Operations (by Node)",
"uid": "V5Me2Vk",
"version": 3,
"weekStart": ""
}

View File

@@ -0,0 +1,635 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "piechart",
"name": "Pie chart",
"version": ""
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 6,
"panels": [],
"title": "Operation List",
"type": "row"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"filterable": false,
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "user"
},
"properties": [
{
"id": "unit",
"value": "none"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 1
},
"id": 2,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "sum_mdsInf"
}
]
},
"pluginVersion": "9.3.0",
"targets": [
{
"alias": "",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(*) FROM \"metaClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"user\" =~ /^$userid$/ GROUP BY \"user\"\n",
"rawQuery": true,
"refId": "A",
"resultFormat": "table",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
],
"title": "Meta Operation List",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {},
"renameByName": {
"sum_close": "close",
"sum_create": "create",
"sum_createLI": "createLI",
"sum_mdsInf": "mdsInf",
"sum_mkdir": "mkdir",
"sum_open": "open",
"sum_rddir": "rddir",
"sum_ren": "ren",
"sum_revalLI": "revalLI",
"sum_rmdir": "rmdir",
"sum_sAttr": "sAttr",
"sum_sChDrct": "sChDrct",
"sum_stat": "stat",
"sum_statLI": "statLI",
"sum_sum": "sum",
"sum_trunc": "trunc",
"sum_unlnk": "unlnk",
"user": "user"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "user"
},
"properties": [
{
"id": "unit",
"value": "none"
}
]
},
{
"matcher": {
"id": "byName",
"options": "sum_B-rd"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
},
{
"matcher": {
"id": "byName",
"options": "sum_B-wr"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 10
},
"id": 4,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "SELECT sum(*) FROM \"storageClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"user\" =~ /^$userid$/ GROUP BY \"user\"\n",
"rawQuery": true,
"refId": "A",
"resultFormat": "table"
}
],
"title": "Storage Operation List",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {},
"renameByName": {
"sum_B-rd": "B-rd",
"sum_B-wr": "B-wr",
"sum_close": "close",
"sum_getFSize": "getFSize",
"sum_ops-rd": "ops-rd",
"sum_ops-wr": "ops-wr",
"sum_sAttr": "sAttr",
"sum_sChDrct": "sChDrct",
"sum_statfs": "statfs",
"sum_storInf": "storinf",
"sum_sum": "sum",
"sum_trunc": "trunc",
"sum_unlnk": "unlnk",
"user": "user"
}
}
}
],
"type": "table"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 19
},
"id": 17,
"panels": [],
"title": "Meta Operation Per User",
"type": "row"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": []
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 0,
"y": 20
},
"id": 24,
"maxPerRow": 4,
"options": {
"displayLabels": [
"percent",
"name"
],
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "donut",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"repeat": "userid",
"repeatDirection": "h",
"targets": [
{
"alias": "",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "SELECT sum(\"close\") AS \"close\", sum(\"getXA\") AS \"getXA\", sum(\"hardlnk\") AS \"hardlnk\", sum(\"listXA\") AS \"listXA\", sum(\"mkdir\") AS \"mkdir\", sum(\"open\") AS \"open\", sum(\"rddir\") AS \"rddir\", sum(\"ren\") AS \"ren\", sum(\"rmXA\") AS \"rmXA\", sum(\"rmdir\") AS \"rmdir\", sum(\"setXA\") AS \"setXA\", sum(\"stat\") AS \"stat\", sum(\"statfs\") AS \"statfs\", sum(\"symlnk\") AS \"symlnk\", sum(\"trunc\") AS \"trunc\", sum(\"unlnk\") AS \"unlnk\", sum(\"ack\") AS \"ack\", sum(\"create\") AS \"create\", sum(\"createLI\") AS \"createLI\", sum(\"dirparent\") AS \"dirparent\", sum(\"entInf\") AS \"entInf\", sum(\"flckAp\") AS \"flckAp\",sum(\"flckEn\") AS \"flckEn\", sum(\"flckRg\") AS \"flckRg\", sum(\"fndOwn\") AS \"fndOwn\", sum(\"lookLI\") AS \"lookLI\", sum(\"mdsInf\") AS \"mdsInf\", sum(\"mirror\") AS \"mirror\", sum(\"mvDirIns\") AS \"mvDirIns\", sum(\"mvFiIns\") AS \"mvFiIns\", sum(\"openLI\") AS \"openLI\", sum(\"refrEnt\") AS \"refrEnt\", sum(\"revalLI\") AS \"revalLI\", sum(\"rmLnk\") AS \"rmLnk\", sum(\"sAttr\") AS \"sAttr\", sum(\"sChDrct\") AS \"sChDrct\", sum(\"sDirPat\") AS \"sDirPat\", sum(\"statLI\") AS \"statLI\" FROM \"metaClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND (\"user\" =~ /^$userid$/) ",
"rawQuery": true,
"refId": "A",
"resultFormat": "table"
}
],
"title": "User ID $userid",
"type": "piechart"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 28
},
"id": 8,
"panels": [],
"title": "Storage Operation Per User",
"type": "row"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": []
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 0,
"y": 29
},
"id": 10,
"maxPerRow": 4,
"options": {
"displayLabels": [
"name",
"percent"
],
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "donut",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.0.8",
"repeat": "userid",
"repeatDirection": "h",
"targets": [
{
"alias": "",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"user"
],
"type": "tag"
}
],
"measurement": "storageClientOpsByUser",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"ack\") AS \"ack\", sum(\"close\") AS \"close\", sum(\"fsync\") AS \"fsync\", sum(\"gendbg\") AS \"gendbg\", sum(\"getFSize\") AS \"getFSize\", sum(\"hrtbeat\") AS \"hrtbeat\", sum(\"ops-rd\") AS \"ops-rd\", sum(\"ops-wr\") AS \"ops-wr\", sum(\"remNode\") AS \"remNode\", sum(\"sAttr\") AS \"sAttr\", sum(\"sChDrct\") AS \"sChDrct\", sum(\"statfs\") AS \"statfs\", sum(\"storInf\") AS \"storInf\", sum(\"trunc\") AS \"trunc\", sum(\"unlnk\") AS \"unlnk\" FROM \"storageClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND (\"user\" =~ /^$userid$/) ",
"rawQuery": true,
"refId": "A",
"resultFormat": "table",
"select": [
[
{
"params": [
"B-wr"
],
"type": "field"
},
{
"params": [],
"type": "sum"
},
{
"params": [
"write"
],
"type": "alias"
}
],
[
{
"params": [
"B-rd"
],
"type": "field"
},
{
"params": [],
"type": "sum"
},
{
"params": [
"read"
],
"type": "alias"
}
]
],
"tags": [
{
"key": "user",
"operator": "=~",
"value": "/^$userid$/"
}
]
}
],
"title": "User ID $userid",
"transparent": true,
"type": "piechart"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "show tag values from storageClientOpsByUser with key = \"user\"",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "userid",
"options": [],
"query": "show tag values from storageClientOpsByUser with key = \"user\"",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Client Operations (by User)",
"uid": "RYuIR1V4k",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,582 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "piechart",
"name": "Pie chart",
"version": ""
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 13,
"panels": [],
"title": "Operation List",
"type": "row"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "user"
},
"properties": [
{
"id": "unit",
"value": "none"
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 1
},
"id": 8,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"frameIndex": 0,
"showHeader": true,
"sortBy": []
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": " from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByUser\")\r\n |> filter(fn: (r) => r.user =~ /${userid:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n",
"refId": "A"
}
],
"title": "Meta Operation List",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"sum {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": true
},
"indexByName": {},
"renameByName": {
"mdsInf {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "mdsInf",
"sChDrct {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "sChDrct",
"stat {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "stat",
"sum {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "",
"user {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "User"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "user"
},
"properties": [
{
"id": "unit",
"value": "none"
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "^B-wr"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "^B-rd"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 8
},
"id": 2,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"frameIndex": 0,
"showHeader": true,
"sortBy": [
{
"desc": false,
"displayName": "_value"
}
]
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByUser\")\r\n |> filter(fn: (r) => r.user =~ /${userid:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")",
"refId": "A"
}
],
"title": "Storage Operation List",
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "table"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 15
},
"id": 18,
"panels": [],
"title": "Meta Operation Per User",
"type": "row"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [],
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 16
},
"id": 4,
"maxPerRow": 4,
"options": {
"displayLabels": [
"percent",
"name"
],
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "donut",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"repeat": "userid",
"repeatDirection": "h",
"targets": [
{
"alias": "",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByUser\")\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> filter(fn: (r) => r.user =~ /$userid$/)",
"rawQuery": true,
"refId": "A",
"resultFormat": "table"
}
],
"title": "User ID $userid",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"sum": true,
"user": true
},
"indexByName": {},
"renameByName": {}
}
}
],
"type": "piechart"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 24
},
"id": 23,
"panels": [],
"title": "Storage Operation Per User",
"type": "row"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [],
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "^B-wr"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "^B-rd"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 25
},
"id": 33,
"maxPerRow": 4,
"options": {
"displayLabels": [
"percent",
"name"
],
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "donut",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"repeat": "userid",
"repeatDirection": "h",
"targets": [
{
"alias": "",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByUser\")\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> filter(fn: (r) => r.user =~ /$userid$/)",
"rawQuery": true,
"refId": "A",
"resultFormat": "table"
}
],
"title": "User ID $userid",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"sum": true,
"user": true
},
"indexByName": {},
"renameByName": {}
}
}
],
"type": "piechart"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": ".*",
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"user\", measurement: \"storageClientOpsByUser\")",
"hide": 0,
"includeAll": true,
"label": "User ID",
"multi": true,
"name": "userid",
"options": [],
"query": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"user\", measurement: \"storageClientOpsByUser\")",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "buckets()",
"hide": 0,
"includeAll": false,
"label": "Bucket",
"multi": false,
"name": "bucket",
"options": [],
"query": "buckets()",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Client Operations (by User)",
"uid": "RBCm2Vk",
"version": 2,
"weekStart": ""
}

View File

@@ -0,0 +1,641 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "piechart",
"name": "Pie chart",
"version": ""
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 6,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"cellOptions": {
"type": "auto"
},
"filterable": false,
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "user"
},
"properties": [
{
"id": "unit",
"value": "none"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 1
},
"id": 2,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "sum_mdsInf"
}
]
},
"pluginVersion": "9.5.0",
"targets": [
{
"alias": "",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(*) FROM \"metaClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"user\" =~ /^$userid$/ GROUP BY \"user\"\n",
"rawQuery": true,
"refId": "A",
"resultFormat": "table",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
],
"title": "Meta Operation List",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {},
"renameByName": {
"sum_close": "close",
"sum_create": "create",
"sum_createLI": "createLI",
"sum_mdsInf": "mdsInf",
"sum_mkdir": "mkdir",
"sum_open": "open",
"sum_rddir": "rddir",
"sum_ren": "ren",
"sum_revalLI": "revalLI",
"sum_rmdir": "rmdir",
"sum_sAttr": "sAttr",
"sum_sChDrct": "sChDrct",
"sum_stat": "stat",
"sum_statLI": "statLI",
"sum_sum": "sum",
"sum_trunc": "trunc",
"sum_unlnk": "unlnk",
"user": "user"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "user"
},
"properties": [
{
"id": "unit",
"value": "none"
}
]
},
{
"matcher": {
"id": "byName",
"options": "sum_B-rd"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
},
{
"matcher": {
"id": "byName",
"options": "sum_B-wr"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 10
},
"id": 4,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.5.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "SELECT sum(*) FROM \"storageClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND \"user\" =~ /^$userid$/ GROUP BY \"user\"\n",
"rawQuery": true,
"refId": "A",
"resultFormat": "table"
}
],
"title": "Storage Operation List",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {},
"renameByName": {
"sum_B-rd": "B-rd",
"sum_B-wr": "B-wr",
"sum_close": "close",
"sum_getFSize": "getFSize",
"sum_ops-rd": "ops-rd",
"sum_ops-wr": "ops-wr",
"sum_sAttr": "sAttr",
"sum_sChDrct": "sChDrct",
"sum_statfs": "statfs",
"sum_storInf": "storinf",
"sum_sum": "sum",
"sum_trunc": "trunc",
"sum_unlnk": "unlnk",
"user": "user"
}
}
}
],
"type": "table"
}
],
"title": "Operation List",
"type": "row"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 1
},
"id": 17,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": []
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 0,
"y": 1
},
"id": 24,
"maxPerRow": 4,
"options": {
"displayLabels": [
"percent",
"name"
],
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "donut",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"repeat": "userid",
"repeatDirection": "h",
"targets": [
{
"alias": "",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "SELECT sum(\"close\") AS \"close\", sum(\"getXA\") AS \"getXA\", sum(\"hardlnk\") AS \"hardlnk\", sum(\"listXA\") AS \"listXA\", sum(\"mkdir\") AS \"mkdir\", sum(\"open\") AS \"open\", sum(\"rddir\") AS \"rddir\", sum(\"ren\") AS \"ren\", sum(\"rmXA\") AS \"rmXA\", sum(\"rmdir\") AS \"rmdir\", sum(\"setXA\") AS \"setXA\", sum(\"stat\") AS \"stat\", sum(\"statfs\") AS \"statfs\", sum(\"symlnk\") AS \"symlnk\", sum(\"trunc\") AS \"trunc\", sum(\"unlnk\") AS \"unlnk\", sum(\"ack\") AS \"ack\", sum(\"create\") AS \"create\", sum(\"createLI\") AS \"createLI\", sum(\"dirparent\") AS \"dirparent\", sum(\"entInf\") AS \"entInf\", sum(\"flckAp\") AS \"flckAp\",sum(\"flckEn\") AS \"flckEn\", sum(\"flckRg\") AS \"flckRg\", sum(\"fndOwn\") AS \"fndOwn\", sum(\"lookLI\") AS \"lookLI\", sum(\"mdsInf\") AS \"mdsInf\", sum(\"mirror\") AS \"mirror\", sum(\"mvDirIns\") AS \"mvDirIns\", sum(\"mvFiIns\") AS \"mvFiIns\", sum(\"openLI\") AS \"openLI\", sum(\"refrEnt\") AS \"refrEnt\", sum(\"revalLI\") AS \"revalLI\", sum(\"rmLnk\") AS \"rmLnk\", sum(\"sAttr\") AS \"sAttr\", sum(\"sChDrct\") AS \"sChDrct\", sum(\"sDirPat\") AS \"sDirPat\", sum(\"statLI\") AS \"statLI\" FROM \"metaClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND (\"user\" =~ /^$userid$/) ",
"rawQuery": true,
"refId": "A",
"resultFormat": "table"
}
],
"title": "User ID $userid",
"type": "piechart"
}
],
"title": "Meta Operation Per User",
"type": "row"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 2
},
"id": 8,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": []
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 0,
"y": 18
},
"id": 10,
"maxPerRow": 4,
"options": {
"displayLabels": [
"name",
"percent"
],
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "donut",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.0.8",
"repeat": "userid",
"repeatDirection": "h",
"targets": [
{
"alias": "",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"user"
],
"type": "tag"
}
],
"measurement": "storageClientOpsByUser",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"ack\") AS \"ack\", sum(\"close\") AS \"close\", sum(\"fsync\") AS \"fsync\", sum(\"gendbg\") AS \"gendbg\", sum(\"getFSize\") AS \"getFSize\", sum(\"hrtbeat\") AS \"hrtbeat\", sum(\"ops-rd\") AS \"ops-rd\", sum(\"ops-wr\") AS \"ops-wr\", sum(\"remNode\") AS \"remNode\", sum(\"sAttr\") AS \"sAttr\", sum(\"sChDrct\") AS \"sChDrct\", sum(\"statfs\") AS \"statfs\", sum(\"storInf\") AS \"storInf\", sum(\"trunc\") AS \"trunc\", sum(\"unlnk\") AS \"unlnk\" FROM \"storageClientOpsByUser\" WHERE time > ${__from:date:seconds}s AND time < ${__to:date:seconds}s AND (\"user\" =~ /^$userid$/) ",
"rawQuery": true,
"refId": "A",
"resultFormat": "table",
"select": [
[
{
"params": [
"B-wr"
],
"type": "field"
},
{
"params": [],
"type": "sum"
},
{
"params": [
"write"
],
"type": "alias"
}
],
[
{
"params": [
"B-rd"
],
"type": "field"
},
{
"params": [],
"type": "sum"
},
{
"params": [
"read"
],
"type": "alias"
}
]
],
"tags": [
{
"key": "user",
"operator": "=~",
"value": "/^$userid$/"
}
]
}
],
"title": "User ID $userid",
"transparent": true,
"type": "piechart"
}
],
"title": "Storage Operation Per User",
"type": "row"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "show tag values from storageClientOpsByUser with key = \"user\"",
"hide": 0,
"includeAll": true,
"label": "User ID",
"multi": true,
"name": "userid",
"options": [],
"query": "show tag values from storageClientOpsByUser with key = \"user\"",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Client Operations (by User)",
"uid": "RYuIR1V4k",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,582 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "piechart",
"name": "Pie chart",
"version": ""
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 13,
"panels": [],
"title": "Operation List",
"type": "row"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "user"
},
"properties": [
{
"id": "unit",
"value": "none"
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 1
},
"id": 8,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"frameIndex": 0,
"showHeader": true,
"sortBy": []
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": " from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByUser\")\r\n |> filter(fn: (r) => r.user =~ /${userid:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n",
"refId": "A"
}
],
"title": "Meta Operation List",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"sum {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": true
},
"indexByName": {},
"renameByName": {
"mdsInf {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "mdsInf",
"sChDrct {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "sChDrct",
"stat {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "stat",
"sum {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "",
"user {_start=\"2023-01-11 23:29:59.44 +0000 UTC\", _stop=\"2023-01-12 05:30:59.646295406 +0000 UTC\"}": "User"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "user"
},
"properties": [
{
"id": "unit",
"value": "none"
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "^B-wr"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "^B-rd"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 8
},
"id": 2,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"frameIndex": 0,
"showHeader": true,
"sortBy": [
{
"desc": false,
"displayName": "_value"
}
]
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByUser\")\r\n |> filter(fn: (r) => r.user =~ /${userid:regex}/)\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")",
"refId": "A"
}
],
"title": "Storage Operation List",
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "table"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 15
},
"id": 18,
"panels": [],
"title": "Meta Operation Per User",
"type": "row"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [],
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 16
},
"id": 4,
"maxPerRow": 4,
"options": {
"displayLabels": [
"percent",
"name"
],
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "donut",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"repeat": "userid",
"repeatDirection": "h",
"targets": [
{
"alias": "",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"metaClientOpsByUser\")\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> filter(fn: (r) => r.user =~ /$userid$/)",
"rawQuery": true,
"refId": "A",
"resultFormat": "table"
}
],
"title": "User ID $userid",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"sum": true,
"user": true
},
"indexByName": {},
"renameByName": {}
}
}
],
"type": "piechart"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 24
},
"id": 23,
"panels": [],
"title": "Storage Operation Per User",
"type": "row"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [],
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "^B-wr"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "^B-rd"
},
"properties": [
{
"id": "unit",
"value": "bytes"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 25
},
"id": 33,
"maxPerRow": 4,
"options": {
"displayLabels": [
"percent",
"name"
],
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "donut",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"repeat": "userid",
"repeatDirection": "h",
"targets": [
{
"alias": "",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "from(bucket: \"${bucket}\")\r\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\r\n |> filter(fn: (r) => r._measurement == \"storageClientOpsByUser\")\r\n |> sum()\r\n |> group()\r\n |> pivot(rowKey:[\"user\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n |> filter(fn: (r) => r.user =~ /$userid$/)",
"rawQuery": true,
"refId": "A",
"resultFormat": "table"
}
],
"title": "User ID $userid",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"sum": true,
"user": true
},
"indexByName": {},
"renameByName": {}
}
}
],
"type": "piechart"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": ".*",
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"user\", measurement: \"storageClientOpsByUser\")",
"hide": 0,
"includeAll": true,
"label": "User ID",
"multi": true,
"name": "userid",
"options": [],
"query": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"user\", measurement: \"storageClientOpsByUser\")",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "buckets()",
"hide": 0,
"includeAll": false,
"label": "Bucket",
"multi": false,
"name": "bucket",
"options": [],
"query": "buckets()",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Client Operations (by User)",
"uid": "RBCm2Vk",
"version": 3,
"weekStart": ""
}

163
mon/scripts/grafana/import-alerts Executable file
View File

@@ -0,0 +1,163 @@
#!/bin/bash
function addAlert() {
alert_json=$(cat "$1")
modified_json=$(echo "$alert_json" | sed -e "s/\${DS_UID}/$DATASOURCE_UID/g")
curl -s -X POST "$HOST/api/v1/provisioning/alert-rules" \
--header "Content-type: application/json" \
--header "X-Disable-Provenance;" \
--data "$modified_json"
}
function addAlertV2() {
alert_json=$(cat "$1")
modified_json=$(echo "$alert_json" | sed -e "s/\${DS_UID}/$DATASOURCE_UID/g; s/\${BUCKET}/$BUCKET_NAME/g")
curl -s -X POST "$HOST/api/v1/provisioning/alert-rules" \
--header "Content-type: application/json" \
--header "X-Disable-Provenance;" \
--data "$modified_json"
}
function addDashboard() {
echo -e "{\"dashboard\": $(cat $1), \"folderUid\": \"beegfsalertfolder\"}" | \
sed -e "s,\${DS_BEEGFS_MON_INFLUXDB},$DATASOURCE_NAME,g" | \
curl -s -X POST "$HOST/api/dashboards/db" \
--header "Content-type: application/json" \
--data @-
}
function addFolder() {
curl -s -X POST "$HOST/api/folders" \
--header "Content-type: application/json" \
--data '{"uid": "beegfsalertfolder", "title": "BeeGFS-Alert"}'
}
function addTemplate() {
curl -s -X PUT "$HOST/api/v1/provisioning/templates/BeeGFS-Email-Template" \
--header "X-Disable-Provenance;" \
--header "Content-Type: application/json" \
--data "@$alert_path/email-template.json"
}
function addContactPoint() {
curl -s -X POST "$HOST/api/v1/provisioning/contact-points" \
--header "X-Disable-Provenance;" \
--header "Content-Type: application/json" \
--data "@$alert_path/contact-point.json"
}
function addPolicies() {
update_policies=$(cat $1)
curl -s -X PUT "$HOST/api/v1/provisioning/policies" \
--header "X-Disable-Provenance;" \
--header "Content-Type: application/json" \
--data "$update_policies"
}
HOST="http://admin:admin@localhost:3000"
if [[ $1 != "default" ]] && [[ ! $# -eq 1 ]]; then
echo "This script imports the default beegfs-mon Alerts into Grafana using its HTTP API."
echo ""
echo "Usage: "
echo "Default installation to localhost: $(basename "$0") default"
echo "Custom installation: $(basename "$0") <grafana url>"
echo ""
echo "Default:"
echo "$(basename "$0") $HOST"
exit 0
fi
command -v curl > /dev/null 2>&1 || \
{
echo "This script requires curl, but it doesn't seem to be installed. Aborting."
exit 1
}
if [[ $1 != "default" ]]; then
HOST="$1"
fi
echo "Select an option:"
echo "1. Using BeeGFS Monitoring with Telegraf"
echo "2. Using BeeGFS Monitoring without Telegraf"
read -p "Enter your Option: " option
if [[ "$option" == "1" ]]; then
monType="wtelegraf"
elif [[ "$option" == "2" ]]; then
monType="wotelegraf"
else
echo "*** Please select correct option ***"
exit 1
fi
echo "Select an option:"
echo "Please select influxdb version:"
echo "1) Influxdb 1.x"
echo "2) Influxdb 2.x"
read -p "Enter your influxdb Verion: " influxdb_version
DATASOURCE_UID=$(curl -s "$HOST/api/datasources/name/beegfs_mon_influxdb" | grep -o '"uid": *"[^"]*"' | cut -d'"' -f4)
DATASOURCE_NAME=$(curl -s "$HOST/api/datasources/name/beegfs_mon_influxdb" | grep -o '"name": *"[^"]*"' | cut -d'"' -f4)
if [[ "$influxdb_version" == "2" ]]; then
BUCKET_NAME=$(curl -s "$HOST/api/datasources/name/beegfs_mon_influxdb" | grep -o '"defaultBucket": *"[^"]*"' | cut -d'"' -f4)
fi
ALERT_DIR=$(dirname "$0")
alert_path="$ALERT_DIR/alerts"
addFolder
if [[ "$influxdb_version" == "1" ]] && [[ "$monType" == "wtelegraf" ]]; then
for alert_file in "$alert_path"/*-v1.json; do
if [ -f "$alert_file" ]; then
addAlert "$alert_file"
fi
done
elif [[ "$influxdb_version" == "2" ]] && [[ "$monType" == "wtelegraf" ]] ; then
for alert_file in "$alert_path"/*-v2.json; do
if [ -f "$alert_file" ]; then
addAlertV2 "$alert_file"
fi
done
elif [[ "$influxdb_version" == "1" ]] && [[ "$monType" == "wotelegraf" ]] ; then
addAlert $alert_path/Disk-alert-v1.json
addAlert $alert_path/Inodes-alert-v1.json
addAlert $alert_path/MetaQueuedrequest-alert-v1.json
addAlert $alert_path/StorageQueuedrequest-alert-v1.json
elif [[ "$influxdb_version" == "2" ]] && [[ "$monType" == "wotelegraf" ]] ; then
addAlertV2 $alert_path/Disk-alert-v2.json
addAlertV2 $alert_path/Inodes-alert-v2.json
addAlertV2 $alert_path/MetaQueuedrequest-alert-v2.json
addAlertV2 $alert_path/StorageQueuedrequest-alert-v2.json
else
echo "*** Please select correct version of InfluxDB ***"
exit 1
fi
addDashboard "$alert_path/alert-dashboard.json"
addTemplate
addContactPoint
if [[ "$monType" == "wotelegraf" ]]; then
addPolicies "$alert_path/policies.json"
elif [[ "$monType" == "wtelegraf" ]] ; then
addPolicies "$alert_path/policies-telegraf.json"
else
echo "*** Please notification policies ***"
fi
echo -e "\n\n\n######### Alert is configured. Next step: update email address in contact point of beegfs-email. #########"

View File

@@ -0,0 +1,146 @@
#!/bin/bash
function addDashboard() {
echo -e "{\"dashboard\": $(cat $1) }" | \
sed -e "s,\${DS_BEEGFS_MON_INFLUXDB},$DATASOURCE_NAME,g" | \
curl -s -X POST "$HOST/api/dashboards/db" \
--header "Content-type: application/json" \
--data @-
}
function addDatasource() {
sed -e "s,%DATABASE_NAME%,$DATABASE_NAME,g" \
-e "s,%DATABASE_USER%,$DATABASE_USER,g" \
-e "s,%DATASOURCE_URL%,$DATASOURCE_URL,g" \
-e "s,%DATASOURCE_NAME%,$DATASOURCE_NAME,g" \
-e "s,%PASSWORD%,$PASSWORD,g" \
"$1" | \
curl -s -X POST "$HOST/api/datasources" \
--header "Content-type: application/json" \
--data @-
}
function addDatasourceV2() {
sed -e "s,%BUCKET_NAME%,$BUCKET_NAME,g" \
-e "s,%ORG_NAME%,$ORG_NAME,g" \
-e "s,%DATASOURCE_URL%,$DATASOURCE_URL,g" \
-e "s,%DATASOURCE_NAME%,$DATASOURCE_NAME,g" \
-e "s,%TOKEN%,$TOKEN,g" \
"$1" | \
curl -s -X POST "$HOST/api/datasources" \
--header "Content-type: application/json" \
--data @-
}
DATASOURCE_NAME="beegfs_mon_influxdb"
HOST="http://admin:admin@localhost:3000"
DATASOURCE_URL="http://localhost:8086"
if [[ $1 != "default" ]] && [[ ! $# -eq 2 ]]; then
echo "This script imports the default beegfs-mon Dashboards into Grafana using its HTTP API."
echo "Curl is required."
echo ""
echo "Usage: "
echo "Default installation to localhost: $(basename "$0") default"
echo "Custom installation: $(basename "$0") <grafana url> <datasource url>"
echo ""
echo "Default:"
echo "$(basename "$0") $HOST $DATASOURCE_URL $DATABASE_NAME"
exit 0
fi
command -v curl > /dev/null 2>&1 || \
{
echo "This script requires curl, but it doesn't seem to be installed. Aborting."
exit 1
}
echo "Select an option:"
echo "1. Using BeeGFS Monitoring with Telegraf"
echo "2. Using BeeGFS Monitoring without Telegraf"
read -p "Enter your Option: " option
if [[ "$option" == "1" ]]; then
monType="wtelegraf"
elif [[ "$option" == "2" ]]; then
monType="wotelegraf"
else
echo "*** Please select correct option ***"
exit 1
fi
echo "Please select influxdb version:"
echo "1) Influxdb 1.x"
echo "2) Influxdb 2.x"
if [[ $1 != "default" ]]; then
HOST="$1"
DATASOURCE_URL="$2"
fi
GRAFANA_DIR=$(dirname "$0")
read -p "Enter your influxdb Verion: " influxdb_version
if [[ "$influxdb_version" == "1" ]] ; then
read -p "Enter Database Name: " DATABASE_NAME
read -p "Enter Database User: " DATABASE_USER
read -s -p "Enter Database Password: " PASSWORD
elif [[ "$influxdb_version" == "2" ]] ; then
read -p "Enter Bucket Name:" BUCKET_NAME
read -p "Enter Organizations: " ORG_NAME
read -s -p "Enter Token: " TOKEN
fi
if [[ "$influxdb_version" == "1" ]] && [[ "$monType" == "wtelegraf" ]] ; then
addDatasource "$GRAFANA_DIR/influxdb.json"
addDashboard "$GRAFANA_DIR/beegfs_overview_telegraf_influxdbv1.json"
addDashboard "$GRAFANA_DIR/meta_telegraf_influxdbv1.json"
addDashboard "$GRAFANA_DIR/storage_telegraf_influxdbv1.json"
addDashboard "$GRAFANA_DIR/storage_targets_telegraf_influxdbv1.json"
addDashboard "$GRAFANA_DIR/client_ops_node_telegraf_influxdbv1.json"
addDashboard "$GRAFANA_DIR/client_ops_user_telegraf_influxdbv1.json"
elif [[ "$influxdb_version" == "2" ]] && [[ "$monType" == "wtelegraf" ]] ; then
addDatasourceV2 "$GRAFANA_DIR/influxdbV2.json"
addDashboard "$GRAFANA_DIR/beegfs_overview_telegraf_influxdbv2.json"
addDashboard "$GRAFANA_DIR/meta_telegraf_influxdbv2.json"
addDashboard "$GRAFANA_DIR/storage_telegraf_influxdbv2.json"
addDashboard "$GRAFANA_DIR/storage_targets_telegraf_influxdbv2.json"
addDashboard "$GRAFANA_DIR/client_ops_node_telegraf_influxdbv2.json"
addDashboard "$GRAFANA_DIR/client_ops_user_telegraf_influxdbv2.json"
elif [[ "$influxdb_version" == "1" ]] && [[ "$monType" == "wotelegraf" ]] ; then
addDatasource "$GRAFANA_DIR/influxdb.json"
addDashboard "$GRAFANA_DIR/beegfs_overview_influxdbv1.json"
addDashboard "$GRAFANA_DIR/meta_influxdbv1.json"
addDashboard "$GRAFANA_DIR/storage_influxdbv1.json"
addDashboard "$GRAFANA_DIR/storage_targets_influxdbv1.json"
addDashboard "$GRAFANA_DIR/client_ops_node_influxdbv1.json"
addDashboard "$GRAFANA_DIR/client_ops_user_influxdbv1.json"
elif [[ "$influxdb_version" == "2" ]] && [[ "$monType" == "wotelegraf" ]] ; then
addDatasourceV2 "$GRAFANA_DIR/influxdbV2.json"
addDashboard "$GRAFANA_DIR/beegfs_overview_influxdbv2.json"
addDashboard "$GRAFANA_DIR/meta_influxdbv2.json"
addDashboard "$GRAFANA_DIR/storage_influxdbv2.json"
addDashboard "$GRAFANA_DIR/storage_targets_influxdbv2.json"
addDashboard "$GRAFANA_DIR/client_ops_node_influxdbv2.json"
addDashboard "$GRAFANA_DIR/client_ops_user_influxdbv2.json"
else
echo "*** Please select correct version of InfluxDB ***"
exit 1
fi

View File

@@ -0,0 +1,10 @@
{
"name":"%DATASOURCE_NAME%",
"type":"influxdb",
"url":"%DATASOURCE_URL%",
"access":"proxy",
"user":"%DATABASE_USER%",
"database":"%DATABASE_NAME%",
"secureJsonData":{
"password":"%PASSWORD%"}
}

View File

@@ -0,0 +1,12 @@
{
"name":"%DATASOURCE_NAME%",
"type":"influxdb",
"url":"%DATASOURCE_URL%",
"access":"proxy",
"jsonData":{
"organization":"%ORG_NAME%",
"defaultBucket":"%BUCKET_NAME%",
"version":"Flux"},
"secureJsonData":{
"token":"%TOKEN%"}
}

View File

@@ -0,0 +1,876 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "continuous-YlBl",
"seriesBy": "max"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "decbytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Received"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#3274d9",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Sent"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "light-green",
"mode": "fixed"
}
},
{
"id": "custom.transform",
"value": "negative-Y"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 30,
"options": {
"legend": {
"calcs": [
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "asc"
}
},
"targets": [
{
"alias": "Received",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"10s"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"measurement": "highResMeta",
"orderByTime": "ASC",
"policy": "default",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"netRecvBytes"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeNumID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
},
{
"alias": "Sent",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"10s"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"hide": false,
"measurement": "highResMeta",
"orderByTime": "ASC",
"policy": "default",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"netSendBytes"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeNumID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
}
],
"title": "Network Traffic",
"type": "timeseries"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Processed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#36bdbc",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Queued"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#ffb357",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 32,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"alias": "Processed",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "highResMeta",
"orderByTime": "ASC",
"policy": "default",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"workRequests"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeNumID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
},
{
"alias": "Queued",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"hide": false,
"measurement": "highResMeta",
"orderByTime": "ASC",
"policy": "default",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"queuedRequests"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeNumID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
}
],
"title": "Work Requests",
"type": "timeseries"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 46,
"links": [],
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"alias": "Responding",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"dsType": "influxdb",
"groupBy": [
{
"params": [
"1m"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"measurement": "meta",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"isResponding\") FROM \"meta\" WHERE \"nodeID\" =~ /^$metaID$/ AND $timeFilter GROUP BY time($__interval) fill(previous)",
"rawQuery": false,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"isResponding"
],
"type": "field"
},
{
"params": [],
"type": "last"
}
]
],
"tags": [
{
"key": "nodeNumID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
}
],
"title": "Availability",
"type": "table"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Direct"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "super-light-yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Indirect"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "super-light-blue",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 34,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"alias": "Direct",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"measurement": "meta",
"orderByTime": "ASC",
"policy": "default",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"directWorkListSize"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeNumID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
},
{
"alias": "Indirect",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"hide": false,
"measurement": "meta",
"orderByTime": "ASC",
"policy": "default",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"indirectWorkListSize"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeNumID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
}
],
"title": "Worklist Size",
"type": "timeseries"
}
],
"refresh": "5s",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "show tag values from meta with key = \"nodeNumID\" ",
"hide": 0,
"includeAll": false,
"multi": false,
"name": "metaID",
"options": [],
"query": "show tag values from meta with key = \"nodeNumID\" ",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "BeeGFS Meta Server",
"uid": "OUJBUPQW",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,903 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "continuous-YlBl",
"seriesBy": "max"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "decbytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Received"
},
"properties": [
{
"id": "displayName",
"value": "Received"
},
{
"id": "color",
"value": {
"fixedColor": "#3274d9",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Send"
},
"properties": [
{
"id": "displayName",
"value": "Send"
},
{
"id": "color",
"value": {
"fixedColor": "#96d98d",
"mode": "fixed"
}
},
{
"id": "custom.transform",
"value": "negative-Y"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 22,
"options": {
"legend": {
"calcs": [
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "asc"
}
},
"targets": [
{
"alias": "Received",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"10s"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"measurement": "highResMeta",
"orderByTime": "ASC",
"policy": "default",
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"highResMeta\" and r._field == \"netRecvBytes\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Received\"})",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"netRecvBytes"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
},
{
"alias": "Sent",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"10s"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"hide": false,
"measurement": "highResMeta",
"orderByTime": "ASC",
"policy": "default",
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"highResMeta\" and r._field == \"netSendBytes\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Send\"})",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"netSendBytes"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
}
],
"title": "Network Traffic",
"type": "timeseries"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Processed"
},
"properties": [
{
"id": "displayName",
"value": "Processed"
},
{
"id": "color",
"value": {
"fixedColor": "#36bdbc",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Queued"
},
"properties": [
{
"id": "displayName",
"value": "Queued"
},
{
"id": "color",
"value": {
"fixedColor": "#ffb357",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 24,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"alias": "Processed",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "highResMeta",
"orderByTime": "ASC",
"policy": "default",
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"highResMeta\" and r._field == \"workRequests\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Processed\"})",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"workRequests"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
},
{
"alias": "Queued",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"hide": false,
"measurement": "highResMeta",
"orderByTime": "ASC",
"policy": "default",
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"highResMeta\" and r._field == \"queuedRequests\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Queued\"})",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"queuedRequests"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
}
],
"title": "Work Requests",
"type": "timeseries"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "#9340cc8f",
"mode": "fixed"
},
"custom": {
"align": "center",
"displayMode": "auto",
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "semi-dark-purple",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "displayName",
"value": "isResponding"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 8,
"links": [],
"maxDataPoints": 100,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"frameIndex": 0,
"showHeader": true
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"dsType": "influxdb",
"groupBy": [],
"measurement": "meta",
"orderByTime": "ASC",
"policy": "default",
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"meta\" and r._field == \"isResponding\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> keep(columns: [\"_time\", \"_value\"]) |> aggregateWindow(every: 1m, fn: last, createEmpty: false) |> yield(name: \"last\") ",
"rawQuery": false,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"isResponding"
],
"type": "field"
},
{
"params": [],
"type": "last"
}
]
],
"tags": [
{
"key": "nodeID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
}
],
"title": "Availability",
"transformations": [],
"type": "table"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Direct"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "super-light-yellow",
"mode": "fixed"
}
},
{
"id": "displayName",
"value": "Direct"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Indirect"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "super-light-blue",
"mode": "fixed"
}
},
{
"id": "displayName",
"value": "Indirect"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 26,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"alias": "Direct",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"measurement": "meta",
"orderByTime": "ASC",
"policy": "default",
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"meta\" and r._field == \"directWorkListSize\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Direct\"})",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"directWorkListSize"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
},
{
"alias": "Indirect",
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"hide": false,
"measurement": "meta",
"orderByTime": "ASC",
"policy": "default",
"query": "from(bucket: \"${bucket}\") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r.nodeNumID == \"${metaID}\" and r._measurement == \"meta\" and r._field == \"indirectWorkListSize\") |> group(columns: [\"nodeNumID\"], mode: \"by\") |> aggregateWindow(every: v.windowPeriod, fn: max, createEmpty: false) |> rename(columns: {_value: \"Indirect\"})",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"indirectWorkListSize"
],
"type": "field"
},
{
"params": [],
"type": "max"
}
]
],
"tags": [
{
"key": "nodeID",
"operator": "=~",
"value": "/^$metaID$/"
}
]
}
],
"title": "Worklist Size",
"type": "timeseries"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "buckets()",
"hide": 0,
"includeAll": false,
"label": "Bucket",
"multi": false,
"name": "bucket",
"options": [],
"query": "buckets()",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"meta\")",
"hide": 0,
"includeAll": false,
"label": "metaID",
"multi": false,
"name": "metaID",
"options": [],
"query": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"meta\")",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Meta Server",
"uid": "OTSb6z",
"version": 2,
"weekStart": ""
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,633 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "storageTargets.Total"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-purple",
"mode": "fixed"
}
},
{
"id": "unit",
"value": "bytes"
},
{
"id": "displayName",
"value": "Disk Total"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Used"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-blue",
"mode": "fixed"
}
},
{
"id": "unit",
"value": "bytes"
},
{
"id": "displayName",
"value": "Disk Used"
}
]
},
{
"matcher": {
"id": "byName",
"options": "storageTargets.Free"
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": true,
"tooltip": true,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"links": [],
"maxPerRow": 2,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.3.2",
"repeat": "storageTargetID",
"repeatDirection": "v",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"measurement": "storageTargets",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT max(\"diskSpaceTotal\"), last(\"diskSpaceFree\"), difference(\"diskSpaceTotal\"), difference(\"diskSpaceTotal\") FROM \"storageTargets\" WHERE (\"storageTargetID\" =~ /^$storageTargetID$/) AND $timeFilter GROUP BY time($__interval) fill(none)",
"rawQuery": false,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"diskSpaceTotal"
],
"type": "field"
},
{
"params": [],
"type": "max"
},
{
"params": [
"Total"
],
"type": "alias"
}
],
[
{
"params": [
"diskSpaceFree"
],
"type": "field"
},
{
"params": [],
"type": "last"
},
{
"params": [
"Free"
],
"type": "alias"
}
]
],
"tags": [
{
"key": "storageTargetID",
"operator": "=~",
"value": "/^$storageTargetID$/"
}
]
}
],
"title": "Disk Space ($storageTargetID)",
"transformations": [
{
"id": "joinByField",
"options": {}
},
{
"id": "calculateField",
"options": {
"alias": "Used",
"binary": {
"left": "storageTargets.Total",
"operator": "-",
"reducer": "sum",
"right": "storageTargets.Free"
},
"mode": "binary",
"reduce": {
"reducer": "sum"
}
}
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "storageTargets.Inodes Total"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#73ffe4",
"mode": "fixed"
}
},
{
"id": "displayName",
"value": "Inodes Total"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Inodes Used"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "storageTargets.Inodes Free"
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": true,
"tooltip": true,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 0
},
"id": 5,
"maxPerRow": 2,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"repeat": "storageTargetID",
"repeatDirection": "v",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "storageTargets",
"orderByTime": "ASC",
"policy": "default",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"inodesTotal"
],
"type": "field"
},
{
"params": [],
"type": "max"
},
{
"params": [
"Inodes Total"
],
"type": "alias"
}
],
[
{
"params": [
"inodesFree"
],
"type": "field"
},
{
"params": [],
"type": "last"
},
{
"params": [
"Inodes Free"
],
"type": "alias"
}
]
],
"tags": [
{
"key": "storageTargetID",
"operator": "=~",
"value": "/^$storageTargetID$/"
}
]
}
],
"title": "Inodes ($storageTargetID)",
"transformations": [
{
"id": "joinByField",
"options": {}
},
{
"id": "calculateField",
"options": {
"alias": "Inodes Used",
"binary": {
"left": "storageTargets.Inodes Total",
"operator": "-",
"reducer": "sum",
"right": "storageTargets.Inodes Free"
},
"mode": "binary",
"reduce": {
"reducer": "sum"
}
}
}
],
"type": "timeseries"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "show tag values from storage with key IN ( \"nodeNumID\") ",
"hide": 0,
"includeAll": false,
"label": "Storage ID",
"multi": false,
"name": "storageID",
"options": [],
"query": "show tag values from storage with key IN ( \"nodeNumID\") ",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "show tag values from storageTargets with key = \"storageTargetID\" where nodeNumID =~ /^$storageID$/",
"hide": 0,
"includeAll": true,
"label": "Storage TargetID",
"multi": true,
"name": "storageTargetID",
"options": [],
"query": "show tag values from storageTargets with key = \"storageTargetID\" where nodeNumID =~ /^$storageID$/",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "BeeGFS Storage Targets",
"uid": "NyuGiE04k",
"version": 2,
"weekStart": ""
}

View File

@@ -0,0 +1,445 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "diskSpaceTotal"
},
"properties": [
{
"id": "displayName",
"value": "Disk Space Total"
},
{
"id": "color",
"value": {
"fixedColor": "semi-dark-purple",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "diskSpaceUsed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-blue",
"mode": "fixed"
}
},
{
"id": "displayName",
"value": "Disk Space Used"
}
]
},
{
"matcher": {
"id": "byName",
"options": "diskSpaceFree"
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": true,
"tooltip": true,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "9.3.2",
"repeat": "storageTargetID",
"repeatDirection": "v",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "from(bucket: \"${bucket}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r.storageTargetID == \"${storageTargetID}\") \r\n|> filter(fn: (r) => r._measurement == \"storageTargets\")\r\n|> filter(fn: (r) => r._field == \"diskSpaceTotal\" or r._field == \"diskSpaceFree\")\r\n|> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n|> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n|> map(fn: (r) => ({ r with _value: r.diskSpaceTotal - r.diskSpaceFree }))\r\n|> rename(columns: {_value: \"diskSpaceUsed\"})",
"refId": "A"
}
],
"title": "Disk Usage ($storageTargetID)",
"type": "timeseries"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "continuous-YlBl"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "inodesTotal"
},
"properties": [
{
"id": "displayName",
"value": "Inodes Total"
},
{
"id": "color",
"value": {
"fixedColor": "#73ffe4",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "inodesUsed"
},
"properties": [
{
"id": "displayName",
"value": "Inodes Used"
},
{
"id": "color",
"value": {
"fixedColor": "semi-dark-yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "inodesFree"
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": true,
"tooltip": true,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 0
},
"id": 4,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"repeat": "storageTargetID",
"repeatDirection": "v",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"hide": false,
"query": "from(bucket: \"${bucket}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r.storageTargetID == \"${storageTargetID}\") \r\n|> filter(fn: (r) => r._measurement == \"storageTargets\")\r\n|> filter(fn: (r) => r._field == \"inodesTotal\" or r._field == \"inodesFree\" )\r\n|> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n|> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n|> map(fn: (r) => ({ r with _value: r.inodesTotal - r.inodesFree }))\r\n|> rename(columns: {_value: \"inodesUsed\"})",
"refId": "A"
}
],
"title": "Inodes ($storageTargetID)",
"type": "timeseries"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "buckets()",
"hide": 0,
"includeAll": false,
"label": "Bucket",
"multi": false,
"name": "bucket",
"options": [],
"query": "buckets()",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"storage\")",
"hide": 0,
"includeAll": false,
"label": "Storage ID",
"multi": false,
"name": "storageID",
"options": [],
"query": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"storage\")",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "import \"influxdata/influxdb/schema\"schema.tagValues( bucket: \"${bucket}\", tag: \"storageTargetID\", predicate: (r) => r._measurement == \"storageTargets\" and r.nodeNumID == \"${storageID:\"\"}\")",
"hide": 0,
"includeAll": true,
"label": "Storage TargetID",
"multi": true,
"name": "storageTargetID",
"options": [],
"query": "import \"influxdata/influxdb/schema\"schema.tagValues( bucket: \"${bucket}\", tag: \"storageTargetID\", predicate: (r) => r._measurement == \"storageTargets\" and r.nodeNumID == \"${storageID:\"\"}\")",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Storage Targets",
"uid": "CtdY1AVzy",
"version": 2,
"weekStart": ""
}

View File

@@ -0,0 +1,633 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "storageTargets.Total"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-purple",
"mode": "fixed"
}
},
{
"id": "unit",
"value": "bytes"
},
{
"id": "displayName",
"value": "Disk Total"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Used"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-blue",
"mode": "fixed"
}
},
{
"id": "unit",
"value": "bytes"
},
{
"id": "displayName",
"value": "Disk Used"
}
]
},
{
"matcher": {
"id": "byName",
"options": "storageTargets.Free"
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": true,
"tooltip": true,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"links": [],
"maxPerRow": 2,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.3.2",
"repeat": "storageTargetID",
"repeatDirection": "v",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"dsType": "influxdb",
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"none"
],
"type": "fill"
}
],
"measurement": "storageTargets",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT max(\"diskSpaceTotal\"), last(\"diskSpaceFree\"), difference(\"diskSpaceTotal\"), difference(\"diskSpaceTotal\") FROM \"storageTargets\" WHERE (\"storageTargetID\" =~ /^$storageTargetID$/) AND $timeFilter GROUP BY time($__interval) fill(none)",
"rawQuery": false,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"diskSpaceTotal"
],
"type": "field"
},
{
"params": [],
"type": "max"
},
{
"params": [
"Total"
],
"type": "alias"
}
],
[
{
"params": [
"diskSpaceFree"
],
"type": "field"
},
{
"params": [],
"type": "last"
},
{
"params": [
"Free"
],
"type": "alias"
}
]
],
"tags": [
{
"key": "storageTargetID",
"operator": "=~",
"value": "/^$storageTargetID$/"
}
]
}
],
"title": "Disk Space ($storageTargetID)",
"transformations": [
{
"id": "joinByField",
"options": {}
},
{
"id": "calculateField",
"options": {
"alias": "Used",
"binary": {
"left": "storageTargets.Total",
"operator": "-",
"reducer": "sum",
"right": "storageTargets.Free"
},
"mode": "binary",
"reduce": {
"reducer": "sum"
}
}
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "storageTargets.Inodes Total"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#73ffe4",
"mode": "fixed"
}
},
{
"id": "displayName",
"value": "Inodes Total"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Inodes Used"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "storageTargets.Inodes Free"
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": true,
"tooltip": true,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 0
},
"id": 5,
"maxPerRow": 2,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"repeat": "storageTargetID",
"repeatDirection": "v",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "storageTargets",
"orderByTime": "ASC",
"policy": "default",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"inodesTotal"
],
"type": "field"
},
{
"params": [],
"type": "max"
},
{
"params": [
"Inodes Total"
],
"type": "alias"
}
],
[
{
"params": [
"inodesFree"
],
"type": "field"
},
{
"params": [],
"type": "last"
},
{
"params": [
"Inodes Free"
],
"type": "alias"
}
]
],
"tags": [
{
"key": "storageTargetID",
"operator": "=~",
"value": "/^$storageTargetID$/"
}
]
}
],
"title": "Inodes ($storageTargetID)",
"transformations": [
{
"id": "joinByField",
"options": {}
},
{
"id": "calculateField",
"options": {
"alias": "Inodes Used",
"binary": {
"left": "storageTargets.Inodes Total",
"operator": "-",
"reducer": "sum",
"right": "storageTargets.Inodes Free"
},
"mode": "binary",
"reduce": {
"reducer": "sum"
}
}
}
],
"type": "timeseries"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "show tag values from storage with key IN ( \"nodeNumID\") ",
"hide": 0,
"includeAll": false,
"label": "Storage ID",
"multi": false,
"name": "storageID",
"options": [],
"query": "show tag values from storage with key IN ( \"nodeNumID\") ",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "show tag values from storageTargets with key = \"storageTargetID\" where nodeNumID =~ /^$storageID$/",
"hide": 0,
"includeAll": true,
"label": "Storage TargetID",
"multi": true,
"name": "storageTargetID",
"options": [],
"query": "show tag values from storageTargets with key = \"storageTargetID\" where nodeNumID =~ /^$storageID$/",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "BeeGFS Storage Targets",
"uid": "NyuGiE04k",
"version": 2,
"weekStart": ""
}

View File

@@ -0,0 +1,445 @@
{
"__inputs": [
{
"name": "DS_BEEGFS_MON_INFLUXDB",
"label": "beegfs_mon_influxdb",
"description": "",
"type": "datasource",
"pluginId": "influxdb",
"pluginName": "InfluxDB"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "datasource",
"id": "influxdb",
"name": "InfluxDB",
"version": "1.0.0"
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "diskSpaceTotal"
},
"properties": [
{
"id": "displayName",
"value": "Disk Space Total"
},
{
"id": "color",
"value": {
"fixedColor": "semi-dark-purple",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "diskSpaceUsed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-blue",
"mode": "fixed"
}
},
{
"id": "displayName",
"value": "Disk Space Used"
}
]
},
{
"matcher": {
"id": "byName",
"options": "diskSpaceFree"
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": true,
"tooltip": true,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "9.3.2",
"repeat": "storageTargetID",
"repeatDirection": "v",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"query": "from(bucket: \"${bucket}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r.storageTargetID == \"${storageTargetID}\") \r\n|> filter(fn: (r) => r._measurement == \"storageTargets\")\r\n|> filter(fn: (r) => r._field == \"diskSpaceTotal\" or r._field == \"diskSpaceFree\")\r\n|> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n|> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n|> map(fn: (r) => ({ r with _value: r.diskSpaceTotal - r.diskSpaceFree }))\r\n|> rename(columns: {_value: \"diskSpaceUsed\"})",
"refId": "A"
}
],
"title": "Disk Usage ($storageTargetID)",
"type": "timeseries"
},
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "continuous-YlBl"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "hue",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "inodesTotal"
},
"properties": [
{
"id": "displayName",
"value": "Inodes Total"
},
{
"id": "color",
"value": {
"fixedColor": "#73ffe4",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "inodesUsed"
},
"properties": [
{
"id": "displayName",
"value": "Inodes Used"
},
{
"id": "color",
"value": {
"fixedColor": "semi-dark-yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "inodesFree"
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": true,
"tooltip": true,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 0
},
"id": 4,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"repeat": "storageTargetID",
"repeatDirection": "v",
"targets": [
{
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"hide": false,
"query": "from(bucket: \"${bucket}\") \r\n|> range(start: v.timeRangeStart, stop:v.timeRangeStop) \r\n|> filter(fn: (r) => r.storageTargetID == \"${storageTargetID}\") \r\n|> filter(fn: (r) => r._measurement == \"storageTargets\")\r\n|> filter(fn: (r) => r._field == \"inodesTotal\" or r._field == \"inodesFree\" )\r\n|> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\r\n|> pivot(rowKey: [\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\r\n|> map(fn: (r) => ({ r with _value: r.inodesTotal - r.inodesFree }))\r\n|> rename(columns: {_value: \"inodesUsed\"})",
"refId": "A"
}
],
"title": "Inodes ($storageTargetID)",
"type": "timeseries"
}
],
"refresh": "",
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "buckets()",
"hide": 0,
"includeAll": false,
"label": "Bucket",
"multi": false,
"name": "bucket",
"options": [],
"query": "buckets()",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"storage\")",
"hide": 0,
"includeAll": false,
"label": "Storage ID",
"multi": false,
"name": "storageID",
"options": [],
"query": "import \"influxdata/influxdb/schema\"schema.measurementTagValues( bucket: \"${bucket}\", tag: \"nodeNumID\", measurement: \"storage\")",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "influxdb",
"uid": "${DS_BEEGFS_MON_INFLUXDB}"
},
"definition": "import \"influxdata/influxdb/schema\"schema.tagValues( bucket: \"${bucket}\", tag: \"storageTargetID\", predicate: (r) => r._measurement == \"storageTargets\" and r.nodeNumID == \"${storageID:\"\"}\")",
"hide": 0,
"includeAll": true,
"label": "Storage TargetID",
"multi": true,
"name": "storageTargetID",
"options": [],
"query": "import \"influxdata/influxdb/schema\"schema.tagValues( bucket: \"${bucket}\", tag: \"storageTargetID\", predicate: (r) => r._measurement == \"storageTargets\" and r.nodeNumID == \"${storageID:\"\"}\")",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "BeeGFS Storage Targets",
"uid": "CtdY1AVzy",
"version": 2,
"weekStart": ""
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

324
mon/source/app/App.cpp Normal file
View File

@@ -0,0 +1,324 @@
#include "App.h"
#include <app/SignalHandler.h>
#include <common/components/ComponentInitException.h>
#include <common/components/worker/DummyWork.h>
#include <misc/Cassandra.h>
#include <misc/InfluxDB.h>
App::App(int argc, char** argv) :
argc(argc), argv(argv)
{}
void App::run()
{
try
{
cfg = boost::make_unique<Config>(argc,argv);
runNormal();
appResult = AppCode::NO_ERROR;
}
catch (const InvalidConfigException& e)
{
std::ostringstream err;
err << "Config error: " << e.what() << std::endl
<< "[BeeGFS Mon Version: " << BEEGFS_VERSION << std::endl
<< "Refer to the default config file (/etc/beegfs/beegfs-mon.conf)" << std::endl
<< "or visit http://www.beegfs.com to find out about configuration options.]";
printOrLogError(err.str());
appResult = AppCode::INVALID_CONFIG;
}
catch (const ComponentInitException& e)
{
printOrLogError("Component initialization error: " + std::string(e.what()));
appResult = AppCode::INITIALIZATION_ERROR;
}
catch (const std::runtime_error& e)
{
printOrLogError("Runtime error: " + std::string(e.what()));
appResult = AppCode::RUNTIME_ERROR;
}
catch (const std::exception& e)
{
printOrLogError("Generic error: " + std::string(e.what()));
appResult = AppCode::RUNTIME_ERROR;
}
}
void App::printOrLogError(const std::string& text) const
{
if (Logger::isInitialized())
LOG(GENERAL, ERR, text);
else
std::cerr << std::endl << text << std::endl << std::endl;
}
void App::runNormal()
{
Logger::createLogger(cfg->getLogLevel(), cfg->getLogType(), cfg->getLogNoDate(),
cfg->getLogStdFile(), cfg->getLogNumLines(), cfg->getLogNumRotatedFiles());
pidFileLockFD = createAndLockPIDFile(cfg->getPIDFile());
initDataObjects();
SignalHandler::registerSignalHandler(this);
initLocalNodeInfo();
initWorkers();
initComponents();
RDMASocket::rdmaForkInitOnce();
if (cfg->getRunDaemonized())
daemonize();
logInfos();
// make sure components don't receive SIGINT/SIGTERM (blocked signals are inherited)
PThread::blockInterruptSignals();
startWorkers();
startComponents();
PThread::unblockInterruptSignals();
joinComponents();
joinWorkers();
}
void App::initLocalNodeInfo()
{
bool useRDMA = cfg->getConnUseRDMA();
unsigned portUDP = cfg->getConnMonPort();
StringList allowedInterfaces;
std::string interfacesFilename = cfg->getConnInterfacesFile();
if (interfacesFilename.length() )
cfg->loadStringListFile(interfacesFilename.c_str(), allowedInterfaces);
NetworkInterfaceCard::findAll(&allowedInterfaces, useRDMA, &localNicList);
if (localNicList.empty() )
throw InvalidConfigException("Couldn't find any usable NIC");
localNicList.sort(NetworkInterfaceCard::NicAddrComp{&allowedInterfaces});
NetworkInterfaceCard::supportedCapabilities(&localNicList, &localNicCaps);
noDefaultRouteNets = std::make_shared<NetVector>();
if(!initNoDefaultRouteList(noDefaultRouteNets.get()))
throw InvalidConfigException("Failed to parse connNoDefaultRoute");
initRoutingTable();
updateRoutingTable();
std::string nodeID = System::getHostname();
// TODO add a Mon nodetype at some point
localNode = std::make_shared<LocalNode>(NODETYPE_Client, nodeID, NumNodeID(1), portUDP, 0, localNicList);
}
void App::initDataObjects()
{
netFilter = boost::make_unique<NetFilter>(cfg->getConnNetFilterFile());
tcpOnlyFilter = boost::make_unique<NetFilter>(cfg->getConnTcpOnlyFilterFile());
netMessageFactory = boost::make_unique<NetMessageFactory>();
workQueue = boost::make_unique<MultiWorkQueue>();
targetMapper = boost::make_unique<TargetMapper>();
metaNodes = boost::make_unique<NodeStoreMetaEx>();
storageNodes = boost::make_unique<NodeStoreStorageEx>();
mgmtNodes = boost::make_unique<NodeStoreMgmtEx>();
metaBuddyGroupMapper = boost::make_unique<MirrorBuddyGroupMapper>();
storageBuddyGroupMapper = boost::make_unique<MirrorBuddyGroupMapper>();
if (cfg->getDbType() == Config::DbTypes::CASSANDRA)
{
Cassandra::Config cassandraConfig;
cassandraConfig.host = cfg->getDbHostName();
cassandraConfig.port = cfg->getDbHostPort();
cassandraConfig.database = cfg->getDbDatabase();
cassandraConfig.maxInsertsPerBatch = cfg->getCassandraMaxInsertsPerBatch();
cassandraConfig.TTLSecs = cfg->getCassandraTTLSecs();
tsdb = boost::make_unique<Cassandra>(std::move(cassandraConfig));
}
else // Config::DbTypes::INFLUXDB OR Config::DbTypes::INFLUXDB2
{
InfluxDB::Config influxdbConfig;
influxdbConfig.host = cfg->getDbHostName();
influxdbConfig.port = cfg->getDbHostPort();
influxdbConfig.maxPointsPerRequest = cfg->getInfluxdbMaxPointsPerRequest();
influxdbConfig.httpTimeout = cfg->getHttpTimeout();
influxdbConfig.curlCheckSSLCertificates = cfg->getCurlCheckSSLCertificates();
if (cfg->getDbType() == Config::DbTypes::INFLUXDB2)
{
influxdbConfig.bucket = cfg->getDbBucket();
influxdbConfig.organization = cfg->getDbAuthOrg();
influxdbConfig.token = cfg->getDbAuthToken();
influxdbConfig.dbVersion = INFLUXDB2;
}
else
{
influxdbConfig.database = cfg->getDbDatabase();
influxdbConfig.setRetentionPolicy = cfg->getInfluxDbSetRetentionPolicy();
influxdbConfig.retentionDuration = cfg->getInfluxDbRetentionDuration();
influxdbConfig.username = cfg->getDbAuthUsername();
influxdbConfig.password = cfg->getDbAuthPassword();
influxdbConfig.dbVersion = INFLUXDB;
}
tsdb = boost::make_unique<InfluxDB>(std::move(influxdbConfig));
}
}
void App::initComponents()
{
nodeListRequestor = boost::make_unique<NodeListRequestor>(this);
statsCollector = boost::make_unique<StatsCollector>(this);
cleanUp = boost::make_unique<CleanUp>(this);
}
void App::startComponents()
{
LOG(GENERAL, DEBUG, "Starting components...");
nodeListRequestor->start();
statsCollector->start();
cleanUp->start();
LOG(GENERAL, DEBUG, "Components running.");
}
void App::stopComponents()
{
if (nodeListRequestor)
nodeListRequestor->selfTerminate();
if (statsCollector)
statsCollector->selfTerminate();
if (cleanUp)
cleanUp->selfTerminate();
stopWorkers();
selfTerminate();
}
void App::joinComponents()
{
LOG(GENERAL, DEBUG, "Joining Component threads...");
nodeListRequestor->join();
statsCollector->join();
cleanUp->join();
LOG(GENERAL, CRITICAL, "All components stopped. Exiting now.");
}
void App::initWorkers()
{
const unsigned numDirectWorkers = 1;
const unsigned workersBufSize = 1024*1024;
unsigned numWorkers = cfg->getTuneNumWorkers();
for (unsigned i=0; i < numWorkers; i++)
{
auto worker = boost::make_unique<Worker>("Worker" + StringTk::intToStr(i+1),
workQueue.get(), QueueWorkType_INDIRECT);
worker->setBufLens(workersBufSize, workersBufSize);
workerList.push_back(std::move(worker));
}
for (unsigned i=0; i < numDirectWorkers; i++)
{
auto worker = boost::make_unique<Worker>("DirectWorker" + StringTk::intToStr(i+1),
workQueue.get(), QueueWorkType_DIRECT);
worker->setBufLens(workersBufSize, workersBufSize);
workerList.push_back(std::move(worker));
}
}
void App::startWorkers()
{
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
{
(*worker)->start();
}
}
void App::stopWorkers()
{
// need two loops because we don't know if the worker that handles the work will be the same that
// received the self-terminate-request
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
{
(*worker)->selfTerminate();
// add dummy work to wake up the worker immediately for faster self termination
PersonalWorkQueue* personalQ = (*worker)->getPersonalWorkQueue();
workQueue->addPersonalWork(new DummyWork(), personalQ);
}
}
void App::joinWorkers()
{
for (auto worker = workerList.begin(); worker != workerList.end(); worker++)
{
waitForComponentTermination((*worker).get());
}
}
void App::logInfos()
{
LOG(GENERAL, CRITICAL, std::string("Version: ") + BEEGFS_VERSION);
#ifdef BEEGFS_DEBUG
LOG(GENERAL, DEBUG, "--DEBUG VERSION--");
#endif
// list usable network interfaces
NicAddressList nicList = getLocalNicList();
logUsableNICs(NULL, nicList);
// print net filters
if (netFilter->getNumFilterEntries() )
{
LOG(GENERAL, WARNING, std::string("Net filters: ")
+ StringTk::uintToStr(netFilter->getNumFilterEntries() ) );
}
if (tcpOnlyFilter->getNumFilterEntries() )
{
LOG(GENERAL, WARNING, std::string("TCP-only filters: ")
+ StringTk::uintToStr(tcpOnlyFilter->getNumFilterEntries() ) );
}
}
void App::daemonize()
{
int nochdir = 1; // 1 to keep working directory
int noclose = 0; // 1 to keep stdin/-out/-err open
LOG(GENERAL, CRITICAL, "Detaching process...");
int detachRes = daemon(nochdir, noclose);
if (detachRes == -1)
throw std::runtime_error(std::string("Unable to detach process: ")
+ System::getErrString());
updateLockedPIDFile(pidFileLockFD); // ignored if pidFileFD is -1
}
void App::handleComponentException(std::exception& e)
{
LOG(GENERAL, CRITICAL, "This component encountered an unrecoverable error.", sysErr,
("Exception", e.what()));
LOG(GENERAL, WARNING, "Shutting down...");
stopComponents();
}
void App::handleNetworkInterfaceFailure(const std::string& devname)
{
// Nothing to do. This App has no internodeSyncer that would rescan the
// netdevs.
LOG(GENERAL, ERR, "Network interface failure.",
("Device", devname));
}

184
mon/source/app/App.h Normal file
View File

@@ -0,0 +1,184 @@
#ifndef APP_H_
#define APP_H_
#include <app/Config.h>
#include <common/app/AbstractApp.h>
#include <common/app/log/Logger.h>
#include <common/Common.h>
#include <common/components/worker/Worker.h>
#include <common/nodes/LocalNode.h>
#include <common/nodes/NodeStoreClients.h>
#include <common/nodes/Node.h>
#include <common/toolkit/MessagingTk.h>
#include <common/toolkit/NetFilter.h>
#include <common/toolkit/NodesTk.h>
#include <misc/TSDatabase.h>
#include <components/CleanUp.h>
#include <components/StatsCollector.h>
#include <components/NodeListRequestor.h>
#include <net/message/NetMessageFactory.h>
#include <nodes/NodeStoreMetaEx.h>
#include <nodes/NodeStoreStorageEx.h>
#include <nodes/NodeStoreMgmtEx.h>
class App : public AbstractApp
{
public:
enum AppCode
{
NO_ERROR = 0,
INVALID_CONFIG = 1,
INITIALIZATION_ERROR = 2,
RUNTIME_ERROR = 3
};
App(int argc, char** argv);
virtual void run() override;
virtual void stopComponents() override;
virtual void handleComponentException(std::exception& e) override;
virtual void handleNetworkInterfaceFailure(const std::string& devname) override;
private:
int appResult;
int argc;
char** argv;
LockFD pidFileLockFD;
std::unique_ptr<TargetMapper> targetMapper;
std::unique_ptr<Config> cfg;
std::unique_ptr<NetFilter> netFilter;
std::unique_ptr<NetFilter> tcpOnlyFilter;
std::unique_ptr<NetMessageFactory> netMessageFactory;
NicListCapabilities localNicCaps;
std::shared_ptr<Node> localNode;
std::unique_ptr<TSDatabase> tsdb;
std::unique_ptr<MultiWorkQueue> workQueue;
std::unique_ptr<NodeStoreMgmtEx> mgmtNodes;
std::unique_ptr<NodeStoreMetaEx> metaNodes;
std::unique_ptr<NodeStoreStorageEx> storageNodes;
std::unique_ptr<MirrorBuddyGroupMapper> metaBuddyGroupMapper;
std::unique_ptr<MirrorBuddyGroupMapper> storageBuddyGroupMapper;
std::unique_ptr<NodeListRequestor> nodeListRequestor;
std::unique_ptr<StatsCollector> statsCollector;
std::unique_ptr<CleanUp> cleanUp;
std::list<std::unique_ptr<Worker>> workerList;
void printOrLogError(const std::string& text) const;
void runNormal();
void initDataObjects();
void initComponents();
void startComponents();
void joinComponents();
void initWorkers();
void startWorkers();
void stopWorkers();
void joinWorkers();
void initLocalNodeInfo();
void logInfos();
void daemonize();
public:
NodeStoreServers* getServerStoreFromType(NodeType nodeType)
{
switch (nodeType)
{
case NODETYPE_Meta:
return metaNodes.get();
case NODETYPE_Storage:
return storageNodes.get();
case NODETYPE_Mgmt:
return mgmtNodes.get();
default:
return nullptr;
}
}
virtual ICommonConfig* getCommonConfig() const override
{
return cfg.get();
}
virtual NetFilter* getNetFilter() const override
{
return netFilter.get();
}
virtual NetFilter* getTcpOnlyFilter() const override
{
return tcpOnlyFilter.get();
}
virtual AbstractNetMessageFactory* getNetMessageFactory() const override
{
return netMessageFactory.get();
}
std::shared_ptr<Node> getLocalNode()
{
return localNode;
}
Config* getConfig()
{
return cfg.get();
}
MultiWorkQueue *getWorkQueue()
{
return workQueue.get();
}
NodeStoreMetaEx *getMetaNodes()
{
return metaNodes.get();
}
NodeStoreStorageEx *getStorageNodes()
{
return storageNodes.get();
}
NodeStoreMgmtEx *getMgmtNodes()
{
return mgmtNodes.get();
}
TSDatabase *getTSDB()
{
return tsdb.get();
}
TargetMapper* getTargetMapper()
{
return targetMapper.get();
}
MirrorBuddyGroupMapper* getMetaBuddyGroupMapper()
{
return metaBuddyGroupMapper.get();
}
MirrorBuddyGroupMapper* getStorageBuddyGroupMapper()
{
return storageBuddyGroupMapper.get();
}
int getAppResult()
{
return appResult;
}
};
#endif /*APP_H_*/

210
mon/source/app/Config.cpp Normal file
View File

@@ -0,0 +1,210 @@
#include <common/toolkit/StringTk.h>
#include "Config.h"
#include <sys/stat.h>
#define CONFIG_DEFAULT_CFGFILENAME "/etc/beegfs/beegfs-mon.conf"
Config::Config(int argc, char** argv): AbstractConfig(argc, argv)
{
initConfig(argc, argv, true);
// check mandatory value
if(getSysMgmtdHost().empty())
throw InvalidConfigException("Management host undefined.");
// Load auth config file
if (!dbAuthFile.empty())
{
std::ifstream authConfig(dbAuthFile);
if (!authConfig.good())
throw InvalidConfigException("Could not open InfluxDB authentication file");
StringMap authMap;
MapTk::loadStringMapFromFile(dbAuthFile.c_str(), &authMap);
for (const auto& e : authMap) {
if (e.first == "password") {
dbAuthPassword = e.second;
} else if (e.first == "username") {
dbAuthUsername = e.second;
} else if (e.first == "organization") {
dbAuthOrg = e.second;
} else if (e.first == "token") {
dbAuthToken = e.second;
} else {
throw InvalidConfigException("The InfluxDB authentication file may only contain "
"the options username and password for influxdb version 1.x "
"organization and token for influxdb version 2.x" );
}
}
}
}
void Config::loadDefaults(bool addDashes)
{
AbstractConfig::loadDefaults();
// re-definitions
configMapRedefine("cfgFile", "");
configMapRedefine("connUseRDMA", "false");
// own definitions
configMapRedefine("connInterfacesFile", "");
configMapRedefine("tuneNumWorkers", "4");
configMapRedefine("runDaemonized", "false");
configMapRedefine("pidFile", "");
configMapRedefine("dbType", "influxdb");
configMapRedefine("dbHostName", "localhost");
configMapRedefine("dbHostPort", "8086");
configMapRedefine("dbDatabase", "beegfs_mon");
configMapRedefine("dbAuthFile", "");
// those are used by influxdb only but are kept like this for compatibility
configMapRedefine("dbMaxPointsPerRequest", "5000");
configMapRedefine("dbSetRetentionPolicy", "true");
configMapRedefine("dbRetentionDuration", "1d");
configMapRedefine("dbBucket", "");
configMapRedefine("cassandraMaxInsertsPerBatch","25");
configMapRedefine("cassandraTTLSecs", "86400");
configMapRedefine("collectClientOpsByNode", "true");
configMapRedefine("collectClientOpsByUser", "true");
configMapRedefine("httpTimeoutMSecs", "1000");
configMapRedefine("statsRequestIntervalSecs", "5");
configMapRedefine("nodelistRequestIntervalSecs","30");
configMapRedefine("curlCheckSSLCertificates", "true");
}
void Config::applyConfigMap(bool enableException, bool addDashes)
{
AbstractConfig::applyConfigMap(false);
for (StringMapIter iter = configMap.begin(); iter != configMap.end(); )
{
bool unknownElement = false;
if (iter->first == std::string("logType"))
{
if (iter->second == "syslog")
{
logType = LogType_SYSLOG;
}
else if (iter->second == "logfile")
{
logType = LogType_LOGFILE;
}
else
{
throw InvalidConfigException("The value of config argument logType is invalid:"
" Must be syslog or logfile.");
}
}
else if (iter->first == std::string("connInterfacesFile"))
connInterfacesFile = iter->second;
else
if (iter->first == std::string("tuneNumWorkers"))
tuneNumWorkers = StringTk::strToUInt(iter->second);
else
if (iter->first == std::string("runDaemonized"))
runDaemonized = StringTk::strToBool(iter->second);
else
if (iter->first == std::string("pidFile"))
pidFile = iter->second;
else
if (iter->first == std::string("dbType"))
{
if (iter->second == "influxdb")
dbType = DbTypes::INFLUXDB;
else if (iter->second == "influxdb2")
dbType = DbTypes::INFLUXDB2;
else if (iter->second == "cassandra")
dbType = DbTypes::CASSANDRA;
else
throw InvalidConfigException("The value of config argument dbType is invalid:"
" Must be influxdb or cassandra.");
}
else
if (iter->first == std::string("dbHostName"))
dbHostName = iter->second;
else
if (iter->first == std::string("dbHostPort"))
dbHostPort = StringTk::strToUInt(iter->second);
else
if (iter->first == std::string("dbDatabase"))
dbDatabase = iter->second;
else
if (iter->first == std::string("dbAuthFile"))
dbAuthFile = iter->second;
else
// those are used by influxdb only but are kept like this for compatibility
if (iter->first == std::string("dbMaxPointsPerRequest"))
influxdbMaxPointsPerRequest = StringTk::strToUInt(iter->second);
else
if (iter->first == std::string("dbSetRetentionPolicy"))
influxdbSetRetentionPolicy = StringTk::strToBool(iter->second);
else
if (iter->first == std::string("dbRetentionDuration"))
influxdbRetentionDuration = iter->second;
else
// those are used by influxdb2
if (iter->first == std::string("dbBucket"))
dbBucket = iter->second;
else
if (iter->first == std::string("cassandraMaxInsertsPerBatch"))
cassandraMaxInsertsPerBatch = StringTk::strToUInt(iter->second);
else
if (iter->first == std::string("cassandraTTLSecs"))
cassandraTTLSecs = StringTk::strToUInt(iter->second);
else
if (iter->first == std::string("collectClientOpsByNode"))
collectClientOpsByNode = StringTk::strToBool(iter->second);
else
if (iter->first == std::string("collectClientOpsByUser"))
collectClientOpsByUser = StringTk::strToBool(iter->second);
else
if (iter->first == std::string("httpTimeoutMSecs"))
httpTimeout = std::chrono::milliseconds(StringTk::strToUInt(iter->second));
else
if (iter->first == std::string("statsRequestIntervalSecs"))
statsRequestInterval = std::chrono::seconds(StringTk::strToUInt(iter->second));
else
if (iter->first == std::string("nodelistRequestIntervalSecs"))
nodelistRequestInterval = std::chrono::seconds(StringTk::strToUInt(iter->second));
else
if (iter->first == std::string("curlCheckSSLCertificates"))
curlCheckSSLCertificates = StringTk::strToBool(iter->second);
else
{
unknownElement = true;
if (enableException)
{
throw InvalidConfigException(std::string("The config argument '")
+ iter->first + std::string("' is invalid.") );
}
}
if (unknownElement)
{
iter++;
}
else
{
iter = eraseFromConfigMap(iter);
}
}
}
void Config::initImplicitVals()
{
AbstractConfig::initConnAuthHash(connAuthFile, &connAuthHash);
}

179
mon/source/app/Config.h Normal file
View File

@@ -0,0 +1,179 @@
#ifndef CONFIG_H_
#define CONFIG_H_
#include <common/app/config/AbstractConfig.h>
class Config : public AbstractConfig
{
public:
Config(int argc, char** argv);
enum DbTypes
{
INFLUXDB,
INFLUXDB2,
CASSANDRA
};
private:
// configurables
std::string connInterfacesFile;
unsigned tuneNumWorkers;
bool runDaemonized;
std::string pidFile;
// mon-specific configurables
DbTypes dbType;
std::string dbHostName;
unsigned dbHostPort;
std::string dbDatabase;
std::string dbBucket;
std::string dbAuthFile;
unsigned influxdbMaxPointsPerRequest;
bool influxdbSetRetentionPolicy;
std::string influxdbRetentionDuration;
unsigned cassandraMaxInsertsPerBatch;
unsigned cassandraTTLSecs;
bool collectClientOpsByNode;
bool collectClientOpsByUser;
std::chrono::milliseconds httpTimeout;
std::chrono::seconds statsRequestInterval;
std::chrono::seconds nodelistRequestInterval;
bool curlCheckSSLCertificates;
std::string dbAuthUsername;
std::string dbAuthPassword;
std::string dbAuthOrg;
std::string dbAuthToken;
virtual void loadDefaults(bool addDashes) override;
virtual void applyConfigMap(bool enableException, bool addDashes) override;
virtual void initImplicitVals() override;
public:
// getters & setters
const std::string& getConnInterfacesFile() const
{
return connInterfacesFile;
}
unsigned getTuneNumWorkers() const
{
return tuneNumWorkers;
}
bool getRunDaemonized() const
{
return runDaemonized;
}
const std::string& getPIDFile() const
{
return pidFile;
}
DbTypes getDbType() const
{
return dbType;
}
const std::string& getDbHostName() const
{
return dbHostName;
}
unsigned getDbHostPort() const
{
return dbHostPort;
}
const std::string& getDbDatabase() const
{
return dbDatabase;
}
const std::string& getDbBucket() const
{
return dbBucket;
}
unsigned getInfluxdbMaxPointsPerRequest() const
{
return influxdbMaxPointsPerRequest;
}
bool getInfluxDbSetRetentionPolicy() const
{
return influxdbSetRetentionPolicy;
}
const std::string& getInfluxDbRetentionDuration() const
{
return influxdbRetentionDuration;
}
unsigned getCassandraMaxInsertsPerBatch() const
{
return cassandraMaxInsertsPerBatch;
}
unsigned getCassandraTTLSecs() const
{
return cassandraTTLSecs;
}
bool getCollectClientOpsByNode() const
{
return collectClientOpsByNode;
}
bool getCollectClientOpsByUser() const
{
return collectClientOpsByUser;
}
const std::chrono::milliseconds& getHttpTimeout() const
{
return httpTimeout;
}
const std::chrono::seconds& getStatsRequestInterval() const
{
return statsRequestInterval;
}
const std::chrono::seconds& getNodelistRequestInterval() const
{
return nodelistRequestInterval;
}
const std::string& getDbAuthUsername() const
{
return dbAuthUsername;
}
const std::string& getDbAuthPassword() const
{
return dbAuthPassword;
}
const std::string& getDbAuthOrg() const
{
return dbAuthOrg;
}
const std::string& getDbAuthToken() const
{
return dbAuthToken;
}
bool getCurlCheckSSLCertificates() const
{
return curlCheckSSLCertificates;
}
};
#endif /*CONFIG_H_*/

14
mon/source/app/Main.cpp Normal file
View File

@@ -0,0 +1,14 @@
#include <common/toolkit/BuildTypeTk.h>
#include <app/SignalHandler.h>
#include <app/App.h>
int main(int argc, char** argv)
{
BuildTypeTk::checkDebugBuildTypes();
AbstractApp::runTimeInitsAndChecks();
App app(argc, argv);
app.startInCurrentThread();
return app.getAppResult();
}

View File

@@ -0,0 +1,49 @@
#include "SignalHandler.h"
#include <common/app/log/Logger.h>
#include <app/App.h>
#include <csignal>
App* SignalHandler::app = nullptr;
void SignalHandler::registerSignalHandler(App* app)
{
SignalHandler::app = app;
signal(SIGINT, SignalHandler::handle);
signal(SIGTERM, SignalHandler::handle);
}
void SignalHandler::handle(int sig)
{
// reset signal handling to default
signal(sig, SIG_DFL);
if (Logger::isInitialized())
{
switch(sig)
{
case SIGINT:
{
LOG(GENERAL, CRITICAL, "Received a SIGINT. Shutting down...");
} break;
case SIGTERM:
{
LOG(GENERAL, CRITICAL, "Received a SIGTERM. Shutting down...");
} break;
default:
{
// shouldn't happen
LOG(GENERAL, CRITICAL, "Received an unknown signal. Shutting down...");
} break;
}
}
if (app != nullptr)
{
app->stopComponents();
}
}

View File

@@ -0,0 +1,16 @@
#ifndef SIGNAL_HANDLER_H_
#define SIGNAL_HANDLER_H_
class App;
class SignalHandler
{
public:
static void registerSignalHandler(App* app);
static void handle(int sig);
private:
static App* app;
};
#endif

View File

@@ -0,0 +1,67 @@
#include "CleanUp.h"
#include <app/App.h>
CleanUp::CleanUp(App* app) :
PThread("CleanUp"), app(app)
{}
void CleanUp::run()
{
try
{
LOG(GENERAL, DEBUG, "Component started.");
registerSignalHandler();
loop();
LOG(GENERAL, DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
app->handleComponentException(e);
}
}
void CleanUp::loop()
{
const std::chrono::minutes idleDisconnectInterval(30);
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(idleDisconnectInterval).count()))
{
dropIdleConns();
}
}
void CleanUp::dropIdleConns()
{
unsigned numDroppedConns = 0;
numDroppedConns += dropIdleConnsByStore(app->getMgmtNodes());
numDroppedConns += dropIdleConnsByStore(app->getMetaNodes());
numDroppedConns += dropIdleConnsByStore(app->getStorageNodes());
if (numDroppedConns)
{
LOG(GENERAL, DEBUG, "Idle connections dropped", numDroppedConns);
}
}
unsigned CleanUp::dropIdleConnsByStore(NodeStoreServers* nodes)
{
unsigned numDroppedConns = 0;
const auto referencedNodes = nodes->referenceAllNodes();
for (auto node = referencedNodes.begin(); node != referencedNodes.end();
node++)
{
// don't do any idle disconnect stuff with local node
// (the LocalNodeConnPool doesn't support and doesn't need this kind of treatment)
if (*node != app->getLocalNode())
{
auto connPool = (*node)->getConnPool();
numDroppedConns += connPool->disconnectAndResetIdleStreams();
}
}
return numDroppedConns;
}

View File

@@ -0,0 +1,24 @@
#ifndef CLEANUP_H_
#define CLEANUP_H_
#include <common/threading/PThread.h>
#include <common/nodes/NodeStoreServers.h>
class App;
class CleanUp : public PThread
{
public:
CleanUp(App* app);
private:
App* const app;
virtual void run() override;
void loop();
void dropIdleConns();
unsigned dropIdleConnsByStore(NodeStoreServers* nodes);
};
#endif /* CLEANUP_H_ */

View File

@@ -0,0 +1,91 @@
#include "NodeListRequestor.h"
#include <common/toolkit/NodesTk.h>
#include <components/worker/GetNodesWork.h>
#include <app/App.h>
static const unsigned MGMT_NUM_TRIES = 3;
static const std::chrono::milliseconds MGMT_TIMEOUT{1000};
NodeListRequestor::NodeListRequestor(App* app) :
PThread("NodeListReq"), app(app)
{}
void NodeListRequestor::run()
{
try
{
LOG(GENERAL, DEBUG, "Component started.");
registerSignalHandler();
requestLoop();
LOG(GENERAL, DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
app->handleComponentException(e);
}
}
void NodeListRequestor::requestLoop()
{
do
{
// Get management node. Do this every time before updating node lists to check if
// management is online to prevent log spam from NodesTk::downloadNodes when it is
// not reachable
if (!getMgmtNodeInfo())
{
LOG(GENERAL, NOTICE, "Did not receive a response from management node!");
continue;
}
// try to reference first mgmt node (which is at the moment the only one)
std::shared_ptr<Node> mgmtNode = app->getMgmtNodes()->referenceFirstNode();
if (mgmtNode)
{
LOG(GENERAL, DEBUG, "Requesting node lists...");
app->getWorkQueue()->addIndirectWork(new GetNodesWork(mgmtNode, app->getMetaNodes(),
NODETYPE_Meta, app->getMetaBuddyGroupMapper(), app->getLocalNode()));
app->getWorkQueue()->addIndirectWork(new GetNodesWork(mgmtNode,
app->getStorageNodes(), NODETYPE_Storage, app->getStorageBuddyGroupMapper(),
app->getLocalNode()));
}
else
{
LOG(GENERAL, DEBUG, "Unable to reference management node for node list request.");
}
}
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(
app->getConfig()->getNodelistRequestInterval()).count()));
}
bool NodeListRequestor::getMgmtNodeInfo()
{
for (unsigned i = 0; i < MGMT_NUM_TRIES; i++)
{
LOG(GENERAL, DEBUG, "Waiting for management node...");
// get mgmtd node using NodesTk
auto mgmtNode = NodesTk::downloadNodeInfo(app->getConfig()->getSysMgmtdHost(),
app->getConfig()->getConnMgmtdPort(), app->getConfig()->getConnAuthHash(),
app->getNetMessageFactory(),
NODETYPE_Mgmt, MGMT_TIMEOUT.count());
if(mgmtNode)
{
app->getMgmtNodes()->addOrUpdateNodeEx(std::move(mgmtNode), nullptr);
return true;
}
if (PThread::waitForSelfTerminateOrder(std::chrono::milliseconds(MGMT_TIMEOUT).count()))
break;
}
return false;
}

View File

@@ -0,0 +1,20 @@
#ifndef NODELISTREQUESTOR_H_
#define NODELISTREQUESTOR_H_
#include <common/threading/PThread.h>
class App;
class NodeListRequestor : public PThread
{
public:
NodeListRequestor(App* app);
private:
App* const app;
virtual void run() override;
void requestLoop();
bool getMgmtNodeInfo();
};
#endif /*NODELISTREQUESTOR_H_*/

View File

@@ -0,0 +1,206 @@
#include "StatsCollector.h"
#include <common/toolkit/SocketTk.h>
#include <common/nodes/OpCounterTypes.h>
#include <app/App.h>
StatsCollector::StatsCollector(App* app) :
PThread("StatsCollector"), app(app)
{}
void StatsCollector::run()
{
try
{
LOG(GENERAL, DEBUG, "Component started.");
registerSignalHandler();
requestLoop();
LOG(GENERAL, DEBUG, "Component stopped.");
}
catch (std::exception& e)
{
app->handleComponentException(e);
}
}
void StatsCollector::requestLoop()
{
bool collectClientOpsByNode = app->getConfig()->getCollectClientOpsByNode();
bool collectClientOpsByUser = app->getConfig()->getCollectClientOpsByUser();
// intially wait one query interval before requesting stats to give NodeListRequestor the time
// to retrieve the node lists
while (!waitForSelfTerminateOrder(std::chrono::milliseconds(
app->getConfig()->getStatsRequestInterval()).count()))
{
{
LOG(GENERAL, DEBUG, "Requesting Stats...");
std::unique_lock<std::mutex> lock(mutex);
workItemCounter = 0;
metaResults.clear();
storageResults.clear();
// collect data
const auto& metaNodes = app->getMetaNodes()->referenceAllNodes();
for (auto node = metaNodes.begin(); node != metaNodes.end(); node++)
{
workItemCounter++;
app->getWorkQueue()->addIndirectWork(
new RequestMetaDataWork(std::static_pointer_cast<MetaNodeEx>(*node),
this, collectClientOpsByNode, collectClientOpsByUser));
}
const auto& storageNodes = app->getStorageNodes()->referenceAllNodes();
for (auto node = storageNodes.begin(); node != storageNodes.end(); node++)
{
workItemCounter++;
app->getWorkQueue()->addIndirectWork(
new RequestStorageDataWork(std::static_pointer_cast<StorageNodeEx>(*node),
this, collectClientOpsByNode, collectClientOpsByUser));
}
while (workItemCounter > 0)
condVar.wait(lock);
// write data
for (auto iter = metaResults.begin(); iter != metaResults.end(); iter++)
{
app->getTSDB()->insertMetaNodeData(iter->node, iter->data);
for (auto listIter = iter->highResStatsList.begin();
listIter != iter->highResStatsList.end(); listIter++)
{
app->getTSDB()->insertHighResMetaNodeData(iter->node, *listIter);
}
if (collectClientOpsByNode)
{
for (auto mapIter = iter->ipOpsUnorderedMap.begin();
mapIter != iter->ipOpsUnorderedMap.end(); mapIter++)
{
ipMetaClientOps.addOpsList(mapIter->first, mapIter->second);
}
}
if (collectClientOpsByUser)
{
for (auto mapIter = iter->userOpsUnorderedMap.begin();
mapIter != iter->userOpsUnorderedMap.end(); mapIter++)
{
userMetaClientOps.addOpsList(mapIter->first, mapIter->second);
}
}
}
for (auto iter = storageResults.begin(); iter != storageResults.end(); iter++)
{
app->getTSDB()->insertStorageNodeData(iter->node, iter->data);
for (auto listIter = iter->highResStatsList.begin();
listIter != iter->highResStatsList.end(); listIter++)
{
app->getTSDB()->insertHighResStorageNodeData(iter->node, *listIter);
}
for (auto listIter = iter->storageTargetList.begin();
listIter != iter->storageTargetList.end();
listIter++)
{
app->getTSDB()->insertStorageTargetsData(iter->node, *listIter);
}
if (collectClientOpsByNode)
{
for (auto mapIter = iter->ipOpsUnorderedMap.begin();
mapIter != iter->ipOpsUnorderedMap.end(); mapIter++)
{
ipStorageClientOps.addOpsList(mapIter->first, mapIter->second);
}
}
if (collectClientOpsByUser)
{
for (auto mapIter = iter->userOpsUnorderedMap.begin();
mapIter != iter->userOpsUnorderedMap.end(); mapIter++)
{
userStorageClientOps.addOpsList(mapIter->first, mapIter->second);
}
}
}
if (collectClientOpsByNode)
{
processClientOps(ipMetaClientOps, NODETYPE_Meta, false);
processClientOps(ipStorageClientOps, NODETYPE_Storage, false);
}
if (collectClientOpsByUser)
{
processClientOps(userMetaClientOps, NODETYPE_Meta, true);
processClientOps(userStorageClientOps, NODETYPE_Storage, true);
}
app->getTSDB()->write();
}
}
}
void StatsCollector::processClientOps(ClientOps& clientOps, NodeType nodeType, bool perUser)
{
ClientOps::IdOpsMap diffOpsMap;
ClientOps::OpsList sumOpsList;
diffOpsMap = clientOps.getDiffOpsMap();
sumOpsList = clientOps.getDiffSumOpsList();
if (!diffOpsMap.empty())
{
for (auto opsMapIter = diffOpsMap.begin();
opsMapIter != diffOpsMap.end();
opsMapIter++)
{
std::string id;
if (perUser)
{
if (opsMapIter->first == ~0U)
id = "undefined";
else
id = StringTk::uintToStr(opsMapIter->first);
}
else
{
struct in_addr inAddr = { (in_addr_t)opsMapIter->first };
id = Socket::ipaddrToStr(inAddr);
}
std::map<std::string, uint64_t> stringOpMap;
unsigned opCounter = 0;
for (auto opsListIter = opsMapIter->second.begin();
opsListIter != opsMapIter->second.end();
opsListIter++)
{
std::string opName;
if (nodeType == NODETYPE_Meta)
opName = OpToStringMapping::mapMetaOpNum(opCounter);
else if (nodeType == NODETYPE_Storage)
opName = OpToStringMapping::mapStorageOpNum(opCounter);
stringOpMap[opName] = *opsListIter;
opCounter++;
}
app->getTSDB()->insertClientNodeData(id, nodeType, stringOpMap, perUser);
}
}
clientOps.clear();
}

View File

@@ -0,0 +1,56 @@
#ifndef STATSCOLLECTOR_H_
#define STATSCOLLECTOR_H_
#include <common/threading/PThread.h>
#include <components/worker/RequestMetaDataWork.h>
#include <components/worker/RequestStorageDataWork.h>
#include <common/nodes/ClientOps.h>
#include <mutex>
#include <condition_variable>
class App;
class StatsCollector : public PThread
{
friend class RequestMetaDataWork;
friend class RequestStorageDataWork;
public:
StatsCollector(App* app);
private:
App* const app;
ClientOps ipMetaClientOps;
ClientOps ipStorageClientOps;
ClientOps userMetaClientOps;
ClientOps userStorageClientOps;
mutable std::mutex mutex;
int workItemCounter;
std::list<RequestMetaDataWork::Result> metaResults;
std::list<RequestStorageDataWork::Result> storageResults;
std::condition_variable condVar;
virtual void run() override;
void requestLoop();
void processClientOps(ClientOps& clientOps, NodeType nodeType, bool perUser);
void insertMetaData(RequestMetaDataWork::Result result)
{
const std::unique_lock<std::mutex> lock(mutex);
metaResults.push_back(std::move(result));
workItemCounter--;
condVar.notify_one();
}
void insertStorageData(RequestStorageDataWork::Result result)
{
const std::unique_lock<std::mutex> lock(mutex);
storageResults.push_back(std::move(result));
workItemCounter--;
condVar.notify_one();
}
};
#endif /*STATSCOLLECTOR_H_*/

View File

@@ -0,0 +1,40 @@
#include "GetNodesWork.h"
#include <common/toolkit/NodesTk.h>
void GetNodesWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
{
std::vector<std::shared_ptr<Node>> nodesList;
std::list<NumNodeID> addedNodes;
std::list<NumNodeID> removedNodes;
if (NodesTk::downloadNodes(*mgmtdNode, nodeType, nodesList, false))
{
// sync the downloaded list with the node store
nodes->syncNodes(nodesList, &addedNodes, &removedNodes, localNode.get());
if (!addedNodes.empty())
LOG(GENERAL, WARNING, "Nodes added.", ("addedNodes", addedNodes.size()), nodeType);
if (!removedNodes.empty())
LOG(GENERAL, WARNING, "Nodes removed.", ("removedNodes", removedNodes.size()), nodeType);
}
else
{
LOG(GENERAL, ERR, "Couldn't download server list from management daemon.", nodeType);
}
std::list<uint16_t> buddyGroupIDList;
std::list<uint16_t> primaryTargetIDList;
std::list<uint16_t> secondaryTargetIDList;
// update the storage buddy groups
if (NodesTk::downloadMirrorBuddyGroups(*mgmtdNode, nodeType, &buddyGroupIDList,
&primaryTargetIDList, &secondaryTargetIDList, false) )
{
buddyGroupMapper->syncGroupsFromLists(buddyGroupIDList, primaryTargetIDList,
secondaryTargetIDList, NumNodeID());
}
}

View File

@@ -0,0 +1,32 @@
#ifndef GETNODESWORK_H_
#define GETNODESWORK_H_
#include <common/components/worker/Work.h>
#include <common/nodes/MirrorBuddyGroupMapper.h>
#include <common/nodes/NodeType.h>
#include <common/nodes/NodeStoreServers.h>
class GetNodesWork : public Work
{
public:
GetNodesWork(std::shared_ptr<Node> mgmtdNode, NodeStoreServers *nodes, NodeType nodeType,
MirrorBuddyGroupMapper* buddyGroupMapper, std::shared_ptr<Node> localNode)
: mgmtdNode(std::move(mgmtdNode)),
nodes(nodes),
nodeType(nodeType),
buddyGroupMapper(buddyGroupMapper),
localNode(localNode)
{}
virtual void process(char* bufIn, unsigned bufInLen,
char* bufOut, unsigned bufOutLen) override;
private:
std::shared_ptr<Node> mgmtdNode;
NodeStoreServers* nodes;
NodeType nodeType;
MirrorBuddyGroupMapper* buddyGroupMapper;
std::shared_ptr<Node> localNode;
};
#endif /*GETNODESWORK_H_*/

View File

@@ -0,0 +1,69 @@
#include "RequestMetaDataWork.h"
#include <common/toolkit/MessagingTk.h>
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
#include <common/net/message/mon/RequestMetaDataMsg.h>
#include <common/net/message/mon/RequestMetaDataRespMsg.h>
#include <components/StatsCollector.h>
void RequestMetaDataWork::process(char* bufIn, unsigned bufInLen, char* bufOut, unsigned bufOutLen)
{
if (!node->getIsResponding())
{
HeartbeatRequestMsg heartbeatRequestMsg;
if(MessagingTk::requestResponse(*node, heartbeatRequestMsg,
NETMSGTYPE_Heartbeat))
{
LOG(GENERAL, DEBUG, "Node is responding again.",
("NodeID", node->getNodeIDWithTypeStr()));
node->setIsResponding(true);
}
}
Result result = {};
result.data.isResponding = false;
if (node->getIsResponding())
{
// generate the RequestDataMsg with the lastStatsTime
RequestMetaDataMsg requestDataMsg(node->getLastStatRequestTime().count());
auto respMsg = MessagingTk::requestResponse(*node, requestDataMsg,
NETMSGTYPE_RequestMetaDataResp);
if (!respMsg)
{
LOG(GENERAL, DEBUG, "Node is not responding.", ("NodeID", node->getNodeIDWithTypeStr()));
node->setIsResponding(false);
}
else
{
// get response and process it
auto metaRspMsg = static_cast<RequestMetaDataRespMsg*>(respMsg.get());
result.highResStatsList = std::move(metaRspMsg->getStatsList());
result.data.isResponding = true;
result.data.indirectWorkListSize = metaRspMsg->getIndirectWorkListSize();
result.data.directWorkListSize = metaRspMsg->getDirectWorkListSize();
result.data.sessionCount = metaRspMsg->getSessionCount();
result.data.hostnameid = metaRspMsg->gethostnameid();
if (!result.highResStatsList.empty())
{
auto lastStatsRequestTime = std::chrono::milliseconds(
result.highResStatsList.front().rawVals.statsTimeMS);
node->setLastStatRequestTime(lastStatsRequestTime);
}
if (collectClientOpsByNode)
result.ipOpsUnorderedMap = ClientOpsRequestor::request(*node, false);
if (collectClientOpsByUser)
result.userOpsUnorderedMap = ClientOpsRequestor::request(*node, true);
}
}
result.node = std::move(node);
statsCollector->insertMetaData(std::move(result));
}

View File

@@ -0,0 +1,42 @@
#ifndef REQUESTMETADATAWORK_H_
#define REQUESTMETADATAWORK_H_
#include <common/components/worker/Work.h>
#include <common/nodes/ClientOps.h>
#include <misc/TSDatabase.h>
#include <nodes/MetaNodeEx.h>
class StatsCollector;
class RequestMetaDataWork : public Work
{
public:
struct Result
{
std::shared_ptr<MetaNodeEx> node;
MetaNodeDataContent data;
HighResStatsList highResStatsList;
ClientOpsRequestor::IdOpsUnorderedMap ipOpsUnorderedMap;
ClientOpsRequestor::IdOpsUnorderedMap userOpsUnorderedMap;
};
RequestMetaDataWork(std::shared_ptr<MetaNodeEx> node,
StatsCollector* statsCollector,
bool collectClientOpsByNode, bool collectClientOpsByUser) :
node(std::move(node)),
statsCollector(statsCollector),
collectClientOpsByNode(collectClientOpsByNode),
collectClientOpsByUser(collectClientOpsByUser)
{}
virtual void process(char* bufIn, unsigned bufInLen,
char* bufOut, unsigned bufOutLen) override;
private:
std::shared_ptr<MetaNodeEx> node;
StatsCollector* statsCollector;
bool collectClientOpsByNode;
bool collectClientOpsByUser;
};
#endif /*REQUESTMETADATAWORK_H_*/

View File

@@ -0,0 +1,74 @@
#include "RequestStorageDataWork.h"
#include <common/toolkit/MessagingTk.h>
#include <common/net/message/nodes/HeartbeatRequestMsg.h>
#include <common/net/message/mon/RequestStorageDataMsg.h>
#include <common/net/message/mon/RequestStorageDataRespMsg.h>
#include <components/StatsCollector.h>
void RequestStorageDataWork::process(char* bufIn, unsigned bufInLen,
char* bufOut, unsigned bufOutLen)
{
if (!node->getIsResponding())
{
HeartbeatRequestMsg heartbeatRequestMsg;
if(MessagingTk::requestResponse(*node, heartbeatRequestMsg,
NETMSGTYPE_Heartbeat))
{
LOG(GENERAL, DEBUG, "Node is responding again.",
("NodeID", node->getNodeIDWithTypeStr()));
node->setIsResponding(true);
}
}
Result result = {};
result.data.isResponding = false;
if (node->getIsResponding())
{
// generate the RequestStorageDataMsg with the lastStatsTime
RequestStorageDataMsg requestDataMsg(node->getLastStatRequestTime().count());
auto respMsg = MessagingTk::requestResponse(*node, requestDataMsg,
NETMSGTYPE_RequestStorageDataResp);
if (!respMsg)
{
LOG(GENERAL, DEBUG, "Node is not responding.", ("NodeID", node->getNodeIDWithTypeStr()));
node->setIsResponding(false);
}
else
{
// get response and process it
auto storageRspMsg = static_cast<RequestStorageDataRespMsg*>(respMsg.get());
result.highResStatsList = std::move(storageRspMsg->getStatsList());
result.storageTargetList = std::move(storageRspMsg->getStorageTargets());
result.data.isResponding = true;
result.data.indirectWorkListSize = storageRspMsg->getIndirectWorkListSize();
result.data.directWorkListSize = storageRspMsg->getDirectWorkListSize();
result.data.diskSpaceTotal = storageRspMsg->getDiskSpaceTotalMiB();
result.data.diskSpaceFree = storageRspMsg->getDiskSpaceFreeMiB();
result.data.sessionCount = storageRspMsg->getSessionCount();
result.data.hostnameid = storageRspMsg->gethostnameid();
if (!result.highResStatsList.empty())
{
auto lastStatsRequestTime = std::chrono::milliseconds(
result.highResStatsList.front().rawVals.statsTimeMS);
node->setLastStatRequestTime(lastStatsRequestTime);
}
if (collectClientOpsByNode)
result.ipOpsUnorderedMap = ClientOpsRequestor::request(*node, false);
if (collectClientOpsByUser)
result.userOpsUnorderedMap = ClientOpsRequestor::request(*node, true);
}
}
result.node = std::move(node);
statsCollector->insertStorageData(std::move(result));
}

View File

@@ -0,0 +1,44 @@
#ifndef REQUESTSTORAGEDATAWORK_H_
#define REQUESTSTORAGEDATAWORK_H_
#include <common/components/worker/Work.h>
#include <common/nodes/ClientOps.h>
#include <common/storage/StorageTargetInfo.h>
#include <misc/TSDatabase.h>
#include <nodes/StorageNodeEx.h>
class StatsCollector;
class RequestStorageDataWork : public Work
{
public:
struct Result
{
std::shared_ptr<StorageNodeEx> node;
StorageNodeDataContent data;
HighResStatsList highResStatsList;
StorageTargetInfoList storageTargetList;
ClientOpsRequestor::IdOpsUnorderedMap ipOpsUnorderedMap;
ClientOpsRequestor::IdOpsUnorderedMap userOpsUnorderedMap;
};
RequestStorageDataWork(std::shared_ptr<StorageNodeEx> node,
StatsCollector* statsCollector, bool collectClientOpsByNode,
bool collectClientOpsByUser) :
node(std::move(node)),
statsCollector(statsCollector),
collectClientOpsByNode(collectClientOpsByNode),
collectClientOpsByUser(collectClientOpsByUser)
{}
void process(char* bufIn, unsigned bufInLen, char* bufOut,
unsigned bufOutLen);
private:
std::shared_ptr<StorageNodeEx> node;
StatsCollector* statsCollector;
bool collectClientOpsByNode;
bool collectClientOpsByUser;
};
#endif /*REQUESTSTORAGEDATAWORK_H_*/

View File

@@ -0,0 +1,8 @@
#ifndef CURLEXCEPTION_H_
#define CURLEXCEPTION_H_
#include <common/toolkit/NamedException.h>
DECLARE_NAMEDEXCEPTION(CurlException, "CurlException")
#endif /*CURLEXCEPTION_H_*/

View File

@@ -0,0 +1,8 @@
#ifndef DATABASEEXCEPTION_H_
#define DATABASEEXCEPTION_H_
#include <common/toolkit/NamedException.h>
DECLARE_NAMEDEXCEPTION(DatabaseException, "DatabaseException")
#endif /*DATABASEEXCEPTION_H_*/

View File

@@ -0,0 +1,348 @@
#include "Cassandra.h"
#include <common/storage/StorageTargetInfo.h>
#include <common/toolkit/StringTk.h>
#include <exception/DatabaseException.h>
#include <chrono>
#include <thread>
static const std::string libVersion = "2.9";
template<typename T>
std::function<T> loadSymbol(void* libHandle, const char* name)
{
dlerror();
auto f = dlsym(libHandle, name);
const char* error = dlerror();
if (error != NULL)
throw std::runtime_error("Couldn't load symbol: " + std::string(error)
+ "\nThe cassandra plugin requires the datastax client library version " + libVersion
+ ".");
return reinterpret_cast<T(*)>(f);
}
Cassandra::Cassandra(Config config) :
cluster(nullptr, [this](CassCluster* c){cluster_free(c);}),
session(nullptr, [this](CassSession* s){session_free(s);}),
batch(nullptr, [this](CassBatch* b){batch_free(b);}),
config(std::move(config)),
libHandle(nullptr, dlclose),
numQueries(0)
{
// Load datastax cassandra library
dlerror();
libHandle.reset(dlopen("libcassandra.so", RTLD_NOW));
const char* error = dlerror();
if (libHandle == NULL || error != NULL)
{
throw std::runtime_error("Couldn't load cassandra client library (libcassandra.so): "
+ std::string(error) + "\nThe cassandra plugin requires the datastax client library"
+ " version " + libVersion + ".");
}
// load used symbols
cluster_new = loadSymbol<decltype(cass_cluster_new)>(
libHandle.get(), "cass_cluster_new");
cluster_free = loadSymbol<decltype(cass_cluster_free)>(
libHandle.get(), "cass_cluster_free");
session_new = loadSymbol<decltype(cass_session_new)>(
libHandle.get(), "cass_session_new");
session_free = loadSymbol<decltype(cass_session_free)>(
libHandle.get(), "cass_session_free");
batch_new = loadSymbol<decltype(cass_batch_new)>(
libHandle.get(), "cass_batch_new");
batch_free = loadSymbol<decltype(cass_batch_free)>(
libHandle.get(), "cass_batch_free");
batch_add_statement = loadSymbol<decltype(cass_batch_add_statement)>(
libHandle.get(), "cass_batch_add_statement");
cluster_set_contact_points = loadSymbol<decltype(cass_cluster_set_contact_points)>(
libHandle.get(), "cass_cluster_set_contact_points");
cluster_set_port = loadSymbol<decltype(cass_cluster_set_port)>(
libHandle.get(), "cass_cluster_set_port");
session_connect = loadSymbol<decltype(cass_session_connect)>(
libHandle.get(), "cass_session_connect");
session_execute = loadSymbol<decltype(cass_session_execute)>(
libHandle.get(), "cass_session_execute");
session_execute_batch = loadSymbol<decltype(cass_session_execute_batch)>(
libHandle.get(), "cass_session_execute_batch");
future_error_code = loadSymbol<decltype(cass_future_error_code)>(
libHandle.get(), "cass_future_error_code");
future_error_message = loadSymbol<decltype(cass_future_error_message)>(
libHandle.get(), "cass_future_error_message");
future_free = loadSymbol<decltype(cass_future_free)>(
libHandle.get(), "cass_future_free");
statement_new = loadSymbol<decltype(cass_statement_new)>(
libHandle.get(), "cass_statement_new");
statement_free = loadSymbol<decltype(cass_statement_free)>(
libHandle.get(), "cass_statement_free");
cluster.reset(cluster_new());
session.reset(session_new());
batch.reset(batch_new(CASS_BATCH_TYPE_LOGGED));
cluster_set_contact_points(cluster.get(), this->config.host.c_str());
cluster_set_port(cluster.get(), this->config.port);
unsigned tries = 0;
while (true)
{
auto connectFuture = std::unique_ptr<CassFuture, decltype(future_free)>(
session_connect(session.get(), cluster.get()), future_free);
CassError err = future_error_code(connectFuture.get());
if (err == CASS_OK)
break;
const char* message;
size_t length;
future_error_message(connectFuture.get(), &message, &length);
LOG(DATABASE, ERR, "Couldn't connect to cassandra database: " + std::string(message));
tries++;
if (tries >= connectionRetries)
throw DatabaseException("Connection to cassandra database failed.");
else
LOG(DATABASE, WARNING, "Retrying in 10 seconds.");
std::this_thread::sleep_for(std::chrono::seconds(10));
}
// Create and switch to keyspace
query("CREATE KEYSPACE IF NOT EXISTS " + this->config.database + " WITH "
+ "replication = {'class': 'SimpleStrategy', 'replication_factor' : 3};");
query("USE " + this->config.database + ";");
// Create tables
query("CREATE TABLE IF NOT EXISTS meta ("
"time timestamp, nodeNumID int, nodeID varchar, isResponding boolean, "
"indirectWorkListSize int, directWorkListSize int, PRIMARY KEY(time, nodeNumID));");
query("CREATE TABLE IF NOT EXISTS highResMeta ("
"time timestamp, nodeNumID int, nodeID varchar, workRequests int, "
"queuedRequests int, netSendBytes int, netRecvBytes int, PRIMARY KEY(time, nodeNumID));");
query("CREATE TABLE IF NOT EXISTS storage ("
"time timestamp, nodeNumID int, nodeID varchar, isResponding boolean, "
"indirectWorkListSize int, directWorkListSize int, "
"diskSpaceTotal bigint, diskSpaceFree bigint, PRIMARY KEY(time, nodeNumID));");
query("CREATE TABLE IF NOT EXISTS highResStorage ("
"time timestamp, nodeNumID int, nodeID varchar, workRequests int, "
"queuedRequests int, diskWriteBytes int, diskReadBytes int, "
"netSendBytes int, netRecvBytes int, PRIMARY KEY(time, nodeNumID));");
query("CREATE TABLE IF NOT EXISTS storageTargetData ("
"time timestamp, nodeNumID int, nodeID varchar, storageTargetID int, "
"diskSpaceTotal bigint, diskSpaceFree bigint, inodesTotal int, inodesFree int, "
"PRIMARY KEY(time, nodeNumID));");
query("CREATE TABLE IF NOT EXISTS metaClientOpsByNode ("
"time timestamp, node varchar, ops map<varchar,int> ,"
"PRIMARY KEY(time, node));");
query("CREATE TABLE IF NOT EXISTS storageClientOpsByNode ("
"time timestamp, node varchar, ops map<varchar,int> ,"
"PRIMARY KEY(time, node));");
query("CREATE TABLE IF NOT EXISTS metaClientOpsByUser ("
"time timestamp, user varchar, ops map<varchar,int> ,"
"PRIMARY KEY(time, user));");
query("CREATE TABLE IF NOT EXISTS storageClientOpsByUser ("
"time timestamp, user varchar, ops map<varchar,int> ,"
"PRIMARY KEY(time, user));");
}
void Cassandra::query(const std::string& query, bool waitForResult)
{
CassStatement* statement = statement_new(query.c_str(), 0);
auto queryFuture = std::unique_ptr<CassFuture, decltype(future_free)>(
session_execute(session.get(), statement), future_free);
statement_free(statement);
if (waitForResult)
{
CassError result = future_error_code(queryFuture.get());
if (result != CASS_OK)
{
const char* message;
size_t length;
future_error_message(queryFuture.get(), &message, &length);
throw DatabaseException("Query '" + query + "' failed: " + std::string(message));
}
}
}
void Cassandra::insertMetaNodeData(std::shared_ptr<Node> node, const MetaNodeDataContent& data)
{
std::ostringstream statement;
statement << "INSERT INTO meta ";
statement << "(time, nodeNumID, nodeID, isResponding";
if (data.isResponding)
statement << ", indirectWorkListSize, directWorkListSize) ";
else
statement << ") ";
statement << "VALUES (";
statement << "TOTIMESTAMP(NOW()), " << node->getNumID() << ", '" << node->getAlias() << "', ";
statement << std::boolalpha << data.isResponding;
if (data.isResponding)
statement << ", " << data.indirectWorkListSize << ", " << data.directWorkListSize << ") ";
else
statement << ") ";
statement << "USING TTL " << config.TTLSecs << ";";
appendQuery(statement.str());
}
void Cassandra::insertStorageNodeData(std::shared_ptr<Node> node,
const StorageNodeDataContent& data)
{
std::ostringstream statement;
statement << "INSERT INTO storage ";
statement << "(time, nodeNumID, nodeID, isResponding";
if (data.isResponding)
statement << ", indirectWorkListSize, directWorkListSize, diskSpaceTotal, diskSpaceFree) ";
else
statement << ") ";
statement << "VALUES (";
statement << "TOTIMESTAMP(NOW()), " << node->getNumID() << ", '" << node->getAlias() << "', ";
statement << std::boolalpha << data.isResponding;
if (data.isResponding)
statement << ", " << data.indirectWorkListSize << ", " << data.directWorkListSize << ", "
<< data.diskSpaceTotal << ", " << data.diskSpaceFree << ") ";
else
statement << ") ";
statement << "USING TTL " << config.TTLSecs << ";";
appendQuery(statement.str());
}
void Cassandra::insertHighResMetaNodeData(std::shared_ptr<Node> node,
const HighResolutionStats& data)
{
std::ostringstream statement;
statement << "INSERT INTO highResMeta ";
statement << "(time, nodeNumID, nodeID, workRequests, ";
statement << "queuedRequests, netSendBytes, netRecvBytes) VALUES (";
statement << data.rawVals.statsTimeMS << ", " << node->getNumID() << ", '" << node->getAlias() << "', ";
statement << data.incVals.workRequests << ", " << data.rawVals.queuedRequests << ", ";
statement << data.incVals.netSendBytes << ", " << data.incVals.netRecvBytes << ") ";
statement << "USING TTL " << config.TTLSecs << ";";
appendQuery(statement.str());
}
void Cassandra::insertHighResStorageNodeData(std::shared_ptr<Node> node,
const HighResolutionStats& data)
{
std::ostringstream statement;
statement << "INSERT INTO highResStorage ";
statement << "(time, nodeNumID, nodeID, workRequests, ";
statement << "queuedRequests, diskWriteBytes, diskReadBytes, netSendBytes, netRecvBytes) VALUES (";
statement << data.rawVals.statsTimeMS << ", " << node->getNumID() << ", '" << node->getAlias() << "', ";
statement << data.incVals.workRequests << ", " << data.rawVals.queuedRequests << ", ";
statement << data.incVals.diskWriteBytes << ", " << data.incVals.diskReadBytes << ", ";
statement << data.incVals.netSendBytes << ", " << data.incVals.netRecvBytes << ") ";
statement << "USING TTL " << config.TTLSecs << ";";
appendQuery(statement.str());
}
void Cassandra::insertStorageTargetsData(std::shared_ptr<Node> node,
const StorageTargetInfo& data)
{
std::ostringstream statement;
statement << "INSERT INTO storageTargetData ";
statement << "(time, nodeNumID, nodeID, storageTargetID, ";
statement << "diskSpaceTotal, diskSpaceFree, inodesTotal, inodesFree) VALUES (";
statement << "TOTIMESTAMP(NOW()), " << node->getNumID() << ", '" << node->getAlias() << "', ";
statement << data.getTargetID() << ", ";
statement << data.getDiskSpaceTotal() << ", " << data.getDiskSpaceFree() << ", ";
statement << data.getInodesTotal() << ", " << data.getInodesFree() << ") ";
statement << "USING TTL " << config.TTLSecs << ";";
appendQuery(statement.str());
}
void Cassandra::insertClientNodeData(const std::string& id, const NodeType nodeType,
const std::map<std::string, uint64_t>& opMap, bool perUser)
{
std::ostringstream statement;
statement << "INSERT INTO ";
if (perUser)
{
if (nodeType == NODETYPE_Meta)
statement << "metaClientOpsByUser";
else if (nodeType == NODETYPE_Storage)
statement << "storageClientOpsByUser";
else
throw DatabaseException("Invalid Nodetype given.");
statement << " (time, user, ops) VALUES (";
}
else
{
if (nodeType == NODETYPE_Meta)
statement << "metaClientOpsByNode";
else if (nodeType == NODETYPE_Storage)
statement << "storageClientOpsByNode";
else
throw DatabaseException("Invalid Nodetype given.");
statement << " (time, node, ops) VALUES (";
}
statement << "TOTIMESTAMP(NOW()), '" << id << "', {";
bool first = true;
for (auto iter = opMap.begin(); iter != opMap.end(); iter++)
{
if (iter->second == 0)
continue;
statement << (first ? "" : ",") << "'" << iter->first << "':" << iter->second;
first = false;
}
statement << "}) USING TTL " << config.TTLSecs << ";";
// if no fields are != 0, dont write anything
if (!first)
appendQuery(statement.str());
}
void Cassandra::appendQuery(const std::string& query)
{
const std::lock_guard<Mutex> lock(queryMutex);
CassStatement* statement = statement_new(query.c_str(), 0);
batch_add_statement(batch.get(), statement);
statement_free(statement);
numQueries++;
if (numQueries >= config.maxInsertsPerBatch)
{
writeUnlocked();
}
}
void Cassandra::write()
{
const std::lock_guard<Mutex> lock(queryMutex);
if(numQueries)
writeUnlocked();
}
void Cassandra::writeUnlocked()
{
CassFuture* batchFuture = session_execute_batch(session.get(), batch.get());
batch.reset(batch_new(CASS_BATCH_TYPE_LOGGED));
future_free(batchFuture);
LOG(DATABASE, DEBUG, "Sent queries to Cassandra.", numQueries);
numQueries = 0;
}

View File

@@ -0,0 +1,80 @@
#ifndef CASSANDRA_H_
#define CASSANDRA_H_
#include <common/nodes/NodeType.h>
#include <common/threading/Mutex.h>
#include <nodes/MetaNodeEx.h>
#include <nodes/StorageNodeEx.h>
#include <misc/TSDatabase.h>
#include <cassandra.h>
#include <dlfcn.h>
class Cassandra : public TSDatabase
{
public:
struct Config
{
std::string host;
int port;
std::string database;
unsigned maxInsertsPerBatch;
unsigned TTLSecs;
};
Cassandra(Config config);
virtual ~Cassandra() {};
virtual void insertMetaNodeData(
std::shared_ptr<Node> node, const MetaNodeDataContent& data) override;
virtual void insertStorageNodeData(
std::shared_ptr<Node> node, const StorageNodeDataContent& data) override;
virtual void insertHighResMetaNodeData(
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
virtual void insertHighResStorageNodeData(
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
virtual void insertStorageTargetsData(
std::shared_ptr<Node> node, const StorageTargetInfo& data) override;
virtual void insertClientNodeData(
const std::string& id, const NodeType nodeType,
const std::map<std::string, uint64_t>& opMap, bool perUser) override;
virtual void write() override;
private:
std::function<decltype(cass_cluster_new)> cluster_new;
std::function<decltype(cass_cluster_free)> cluster_free;
std::function<decltype(cass_session_new)> session_new;
std::function<decltype(cass_session_free)> session_free;
std::function<decltype(cass_batch_new)> batch_new;
std::function<decltype(cass_batch_free)> batch_free;
std::function<decltype(cass_batch_add_statement)> batch_add_statement;
std::function<decltype(cass_cluster_set_contact_points)> cluster_set_contact_points;
std::function<decltype(cass_cluster_set_port)> cluster_set_port;
std::function<decltype(cass_session_connect)> session_connect;
std::function<decltype(cass_session_execute)> session_execute;
std::function<decltype(cass_session_execute_batch)> session_execute_batch;
std::function<decltype(cass_future_error_code)> future_error_code;
std::function<decltype(cass_future_error_message)> future_error_message;
std::function<decltype(cass_future_free)> future_free;
std::function<decltype(cass_statement_new)> statement_new;
std::function<decltype(cass_statement_free)> statement_free;
std::unique_ptr<CassCluster, decltype(cluster_free)> cluster;
std::unique_ptr<CassSession, decltype(session_free)> session;
std::unique_ptr<CassBatch, decltype(batch_free)> batch;
const Config config;
std::unique_ptr<void, int(*)(void*)> libHandle;
std::string queryBuffer;
unsigned numQueries;
mutable Mutex queryMutex;
void appendQuery(const std::string& query);
void query(const std::string& query, bool waitForResult = true);
void writeUnlocked();
};
#endif

View File

@@ -0,0 +1,153 @@
#include "CurlWrapper.h"
#include <exception/CurlException.h>
CurlWrapper::CurlWrapper(std::chrono::milliseconds timeout, bool checkSSLCertificates) :
curlHandle(curl_easy_init(), &curl_easy_cleanup)
{
if (curlHandle.get() == NULL)
throw CurlException("Curl init failed.");
if (curl_easy_setopt(curlHandle.get(), CURLOPT_ERRORBUFFER, &errorBuffer) != CURLE_OK)
throw CurlException("Setting Curl error buffer failed.");
if (curl_easy_setopt(curlHandle.get(), CURLOPT_NOSIGNAL, 1L) != CURLE_OK)
throw CurlException(errorBuffer);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_TIMEOUT_MS,
std::chrono::milliseconds(timeout).count()) != CURLE_OK)
throw CurlException(errorBuffer);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_WRITEFUNCTION, writeCallback) != CURLE_OK)
throw CurlException(errorBuffer);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_WRITEDATA, static_cast<void*>(this)) != CURLE_OK)
throw CurlException(errorBuffer);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_CONNECTTIMEOUT_MS,
timeout.count()) != CURLE_OK)
throw CurlException(errorBuffer);
if (!checkSSLCertificates)
{
if (curl_easy_setopt(curlHandle.get(), CURLOPT_SSL_VERIFYPEER, 0) != CURLE_OK)
throw CurlException(errorBuffer);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_SSL_VERIFYHOST, 0) != CURLE_OK)
throw CurlException(errorBuffer);
}
}
void CurlWrapper::enableHttpAuth(const std::string& user, const std::string& password)
{
if (curl_easy_setopt(curlHandle.get(), CURLOPT_HTTPAUTH, CURLAUTH_ANY))
throw CurlException(errorBuffer);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_USERNAME, user.c_str()))
throw CurlException(errorBuffer);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_PASSWORD, password.c_str()))
throw CurlException(errorBuffer);
}
unsigned short CurlWrapper::sendGetRequest(const std::string& url, const ParameterMap& parameters)
{
std::string parameterStr = makeParameterStr(parameters);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_URL, (url + parameterStr).c_str()) != CURLE_OK)
throw CurlException(errorBuffer);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_HTTPGET, 1L) != CURLE_OK)
throw CurlException(errorBuffer);
// replace with curl_multi_perform?
if (curl_easy_perform(curlHandle.get()) != CURLE_OK)
throw CurlException(errorBuffer);
long responseCode;
if (curl_easy_getinfo(curlHandle.get(), CURLINFO_RESPONSE_CODE, &responseCode) != CURLE_OK)
throw CurlException(errorBuffer);
return responseCode;
}
unsigned short CurlWrapper::sendPostRequest(const std::string& url, const char* data,
const ParameterMap& parameters, const std::vector<std::string>& headers)
{
std::string parameterStr = makeParameterStr(parameters);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_URL, (url + parameterStr).c_str()) != CURLE_OK)
throw CurlException(errorBuffer);
if (curl_easy_setopt(curlHandle.get(), CURLOPT_POSTFIELDS, data) != CURLE_OK)
throw CurlException(errorBuffer);
struct curl_slist* headerList = nullptr;
for (const auto& header : headers) {
headerList = curl_slist_append(headerList, header.c_str());
}
if (curl_easy_setopt(curlHandle.get(), CURLOPT_HTTPHEADER, headerList) != CURLE_OK)
throw CurlException(errorBuffer);
// replace with curl_multi_perform?
if (curl_easy_perform(curlHandle.get()) != CURLE_OK)
throw CurlException(errorBuffer);
long responseCode;
if (curl_easy_getinfo(curlHandle.get(), CURLINFO_RESPONSE_CODE, &responseCode) != CURLE_OK)
throw CurlException(errorBuffer);
return responseCode;
}
std::string CurlWrapper::makeParameterStr(const ParameterMap& parameters) const
{
if (!parameters.empty())
{
std::string parameterStr = "?";
for (auto iter = parameters.begin(); iter != parameters.end(); iter++)
{
{
auto escaped = std::unique_ptr<char, void(*)(void*)> (
curl_easy_escape(curlHandle.get(), (iter->first).c_str(),0),
&curl_free);
if (!escaped)
throw CurlException(errorBuffer);
parameterStr += escaped.get();
}
{
auto escaped = std::unique_ptr<char, void(*)(void*)> (
curl_easy_escape(curlHandle.get(), (iter->second).c_str(),0),
&curl_free);
if (!escaped)
throw CurlException(errorBuffer);
parameterStr += "=";
parameterStr += escaped.get();
parameterStr += "&";
}
}
parameterStr.resize(parameterStr.size() - 1);
return parameterStr;
}
return {};
}
size_t CurlWrapper::writeCallback(char *ptr, size_t size, size_t nmemb, void *userdata)
{
auto instance = static_cast<CurlWrapper*>(userdata);
instance->setResponse(std::string(ptr, size*nmemb));
// Always signal success
return size*nmemb;
}

View File

@@ -0,0 +1,57 @@
#ifndef CURL_WRAPPER_H_
#define CURL_WRAPPER_H_
#include <common/threading/Mutex.h>
#include <curl/curl.h>
#include <chrono>
#include <mutex>
#include <unordered_map>
class CurlWrapper
{
public:
CurlWrapper(std::chrono::milliseconds timeout, bool checkSSLCertificates);
CurlWrapper(const CurlWrapper&) = delete;
CurlWrapper& operator=(const CurlWrapper&) = delete;
CurlWrapper(CurlWrapper&&) = delete;
CurlWrapper& operator=(CurlWrapper&&) = delete;
~CurlWrapper() = default;
void enableHttpAuth(const std::string& user, const std::string& password);
typedef std::unordered_map<std::string, std::string> ParameterMap;
unsigned short sendGetRequest(const std::string& url,
const ParameterMap& parameters);
unsigned short sendPostRequest(const std::string& url, const char* data,
const ParameterMap& parameters, const std::vector<std::string>& headers);
static size_t writeCallback(char *ptr, size_t size, size_t nmemb, void *userdata);
protected:
std::unique_ptr<CURL, void(*)(void*)> curlHandle;
std::string response;
char errorBuffer[CURL_ERROR_SIZE];
std::string makeParameterStr(const ParameterMap& parameters) const;
void setResponse(const std::string& response)
{
this->response = response;
}
public:
const std::string& getResponse() const
{
return response;
}
};
#endif

View File

@@ -0,0 +1,344 @@
#include "InfluxDB.h"
#include <common/storage/StorageTargetInfo.h>
#include <common/toolkit/StringTk.h>
#include <exception/DatabaseException.h>
#include <exception/CurlException.h>
#include <thread>
#include <chrono>
#include <boost/algorithm/string/replace.hpp>
static const std::string retentionPolicyName = "auto";
InfluxDB::InfluxDB(Config cfg) :
config(std::move(cfg))
{
curlWrapper = boost::make_unique<CurlWrapper>(config.httpTimeout, config.curlCheckSSLCertificates);
if (config.dbVersion == INFLUXDB)
{
if (!config.username.empty())
curlWrapper->enableHttpAuth(config.username, config.password);
setupDatabase();
}
}
void InfluxDB::setupDatabase() const
{
// Wait for InfluxDB service being available
unsigned tries = 0;
while(!sendPing())
{
tries++;
LOG(DATABASE, ERR, "Coudn't reach InfluxDB service.");
if (tries >= connectionRetries)
throw DatabaseException("Connection to InfluxDB failed.");
else
LOG(DATABASE, WARNING, "Retrying in 10 seconds.");
std::this_thread::sleep_for(std::chrono::seconds(10));
}
// these are called every time the service starts but is being ignored by influxdb if
// the db and rp already exist
sendQuery("create database " + config.database);
if (config.setRetentionPolicy)
{
sendQuery("create retention policy " + retentionPolicyName + " on " + config.database
+ " duration " + config.retentionDuration
+ " replication 1 default");
}
}
void InfluxDB::insertMetaNodeData(std::shared_ptr<Node> node, const MetaNodeDataContent& data)
{
std::ostringstream point;
point << "meta";
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
point << ",nodeNumID=" << node->getNumID();
if(data.isResponding)
{
point << " isResponding=" << std::boolalpha << true;
point << ",indirectWorkListSize=" << data.indirectWorkListSize;
point << ",directWorkListSize=" << data.directWorkListSize;
point << ",hostnameid=\"" << data.hostnameid << "\"";
}
else
{
point << " isResponding=" << std::boolalpha << false;
}
appendPoint(point.str());
}
void InfluxDB::insertStorageNodeData(std::shared_ptr<Node> node,
const StorageNodeDataContent& data)
{
std::ostringstream point;
point << "storage";
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
point << ",nodeNumID=" << node->getNumID();
if(data.isResponding)
{
point << " isResponding=" << std::boolalpha << true;
point << ",indirectWorkListSize=" << data.indirectWorkListSize;
point << ",directWorkListSize=" << data.directWorkListSize;
point << ",diskSpaceTotal=" << data.diskSpaceTotal;
point << ",diskSpaceFree=" << data.diskSpaceFree;
point << ",hostnameid=\"" << data.hostnameid << "\"";
}
else
{
point << " isResponding=" << std::boolalpha << false;
}
appendPoint(point.str());
}
void InfluxDB::insertHighResMetaNodeData(std::shared_ptr<Node> node,
const HighResolutionStats& data)
{
std::ostringstream point;
point << "highResMeta";
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
point << ",nodeNumID=" << node->getNumID();
point << " workRequests=" << data.incVals.workRequests;
point << ",queuedRequests=" << data.rawVals.queuedRequests;
point << ",netSendBytes=" << data.incVals.netSendBytes;
point << ",netRecvBytes=" << data.incVals.netRecvBytes;
// timestamp in ns
point << " " << std::chrono::nanoseconds(
std::chrono::milliseconds(data.rawVals.statsTimeMS)).count();
appendPoint(point.str());
}
void InfluxDB::insertHighResStorageNodeData(std::shared_ptr<Node> node,
const HighResolutionStats& data)
{
std::ostringstream point;
point << "highResStorage";
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
point << ",nodeNumID=" << node->getNumID();
point << " workRequests=" << data.incVals.workRequests;
point << ",queuedRequests=" << data.rawVals.queuedRequests;
point << ",diskWriteBytes=" << data.incVals.diskWriteBytes;
point << ",diskReadBytes=" << data.incVals.diskReadBytes;
point << ",netSendBytes=" << data.incVals.netSendBytes;
point << ",netRecvBytes=" << data.incVals.netRecvBytes;
// timestamp in ns
point << " " << std::chrono::nanoseconds(
std::chrono::milliseconds(data.rawVals.statsTimeMS)).count();
appendPoint(point.str());
}
void InfluxDB::insertStorageTargetsData(std::shared_ptr<Node> node,
const StorageTargetInfo& data)
{
std::ostringstream point;
point << "storageTargets";
point << ",nodeID=" << escapeStringForWrite(node->getAlias());
point << ",nodeNumID=" << node->getNumID();
point << ",storageTargetID=" << data.getTargetID();
point << " diskSpaceTotal=" << data.getDiskSpaceTotal();
point << ",diskSpaceFree=" << data.getDiskSpaceFree();
point << ",inodesTotal=" << data.getInodesTotal();
point << ",inodesFree=" << data.getInodesFree();
std::string t;
if (data.getState() == TargetConsistencyState::TargetConsistencyState_GOOD)
t = "GOOD";
else if (data.getState() == TargetConsistencyState::TargetConsistencyState_NEEDS_RESYNC)
t = "NEEDS_RESYNC";
else
t = "BAD";
point << ",targetConsistencyState=\"" << t << "\"";
appendPoint(point.str());
}
void InfluxDB::insertClientNodeData(const std::string& id, const NodeType nodeType,
const std::map<std::string, uint64_t>& opMap, bool perUser)
{
std::ostringstream point;
if (perUser)
{
if (nodeType == NODETYPE_Meta)
point << "metaClientOpsByUser";
else if (nodeType == NODETYPE_Storage)
point << "storageClientOpsByUser";
else
throw DatabaseException("Invalid Nodetype given.");
}
else
{
if (nodeType == NODETYPE_Meta)
point << "metaClientOpsByNode";
else if (nodeType == NODETYPE_Storage)
point << "storageClientOpsByNode";
else
throw DatabaseException("Invalid Nodetype given.");
}
point << (perUser ? ",user=" : ",node=") << id;
bool first = true;
for (auto iter = opMap.begin(); iter != opMap.end(); iter++)
{
if (iter->second == 0)
continue;
point << (first ? " " : ",") << iter->first << "=" << iter->second;
first = false;
}
// if no fields are != 0, dont write anything
if (!first)
appendPoint(point.str());
}
void InfluxDB::appendPoint(const std::string& point)
{
const std::lock_guard<Mutex> mutexLock(pointsMutex);
points += point + "\n";
numPoints++;
// test also for size? make it an option?
if (numPoints >= config.maxPointsPerRequest)
{
writePointsUnlocked();
}
}
void InfluxDB::write()
{
const std::lock_guard<Mutex> mutexLock(pointsMutex);
writePointsUnlocked();
}
void InfluxDB::writePointsUnlocked()
{
sendWrite(points);
points.clear();
LOG(DATABASE, DEBUG, "Sent data to InfluxDB.", numPoints);
numPoints = 0;
}
void InfluxDB::sendWrite(const std::string& data) const
{
unsigned short responseCode = 0;
CurlWrapper::ParameterMap params;
std::string url;
std::vector<std::string> headers;
if (config.dbVersion == INFLUXDB)
{
params["db"] = config.database;
url = config.host + ":" + StringTk::intToStr(config.port) + "/write";
}
else
{
params["org"] = config.organization;
params["bucket"] = config.bucket;
url = config.host + ":" + StringTk::intToStr(config.port) + "/api/v2/write";
headers.push_back("Authorization: Token " + config.token);
}
const std::lock_guard<Mutex> mutexLock(curlMutex);
try
{
responseCode = curlWrapper->sendPostRequest(url, data.c_str(), params, headers);
}
catch (const CurlException& e)
{
LOG(DATABASE, ERR, "Writing to InfluxDB failed due to Curl error.", ("Error", e.what()));
return;
}
if (responseCode < 200 || responseCode >= 300)
{
LOG(DATABASE, ERR, "Writing to InfluxDB failed.", responseCode,
("responseMessage", curlWrapper->getResponse()));
}
}
void InfluxDB::sendQuery(const std::string& data) const
{
unsigned short responseCode = 0;
CurlWrapper::ParameterMap params;
params["db"] = config.database;
params["q"] = data;
const std::lock_guard<Mutex> mutexLock(curlMutex);
try
{
responseCode = curlWrapper->sendPostRequest(config.host + ":"
+ StringTk::intToStr(config.port)
+ "/query", "", params, {});
}
catch (const CurlException& e)
{
LOG(DATABASE, ERR, "Querying InfluxDB failed due to Curl error.", ("Error", e.what()));
return;
}
if (responseCode < 200 || responseCode >= 300)
{
LOG(DATABASE, ERR, "Querying InfluxDB failed.", responseCode,
("responseMessage", curlWrapper->getResponse()));
}
}
bool InfluxDB::sendPing() const
{
unsigned short responseCode = 0;
const std::lock_guard<Mutex> mutexLock(curlMutex);
try
{
responseCode = curlWrapper->sendGetRequest(config.host + ":"
+ StringTk::intToStr(config.port) + "/ping", CurlWrapper::ParameterMap());
}
catch (const CurlException& e)
{
LOG(DATABASE, ERR, "Pinging InfluxDB failed due to Curl error.", ("Error", e.what()));
return false;
}
if (responseCode < 200 || responseCode >= 300)
{
LOG(DATABASE, ERR, "Pinging InfluxDB failed.", responseCode,
("responseMessage", curlWrapper->getResponse()));
return false;
}
return true;
}
/*
* According to InfluxDB documentation, spaces, "=" and "," need to be escaped for write.
*/
std::string InfluxDB::escapeStringForWrite(const std::string& str)
{
std::string result = str;
boost::replace_all(result, " ", "\\ ");
boost::replace_all(result, "=", "\\=");
boost::replace_all(result, ",", "\\,");
return result;
}

View File

@@ -0,0 +1,84 @@
#ifndef INFLUXDB_H_
#define INFLUXDB_H_
#include <common/nodes/NodeType.h>
#include <common/threading/Mutex.h>
#include <nodes/MetaNodeEx.h>
#include <nodes/StorageNodeEx.h>
#include <misc/CurlWrapper.h>
#include <misc/TSDatabase.h>
#include <app/Config.h>
enum InfluxDBVersion
{
INFLUXDB,
INFLUXDB2,
};
class App;
class InfluxDB : public TSDatabase
{
public:
struct Config
{
std::string host;
int port;
std::string database;
std::chrono::milliseconds httpTimeout;
unsigned maxPointsPerRequest;
bool setRetentionPolicy;
std::string retentionDuration;
bool curlCheckSSLCertificates;
std::string username;
std::string password;
std::string bucket;
std::string organization;
std::string token;
InfluxDBVersion dbVersion;
};
InfluxDB(Config cfg);
virtual ~InfluxDB() {};
virtual void insertMetaNodeData(
std::shared_ptr<Node> node, const MetaNodeDataContent& data) override;
virtual void insertStorageNodeData(
std::shared_ptr<Node> node, const StorageNodeDataContent& data) override;
virtual void insertHighResMetaNodeData(
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
virtual void insertHighResStorageNodeData(
std::shared_ptr<Node> node, const HighResolutionStats& data) override;
virtual void insertStorageTargetsData(
std::shared_ptr<Node> node, const StorageTargetInfo& data) override;
virtual void insertClientNodeData(
const std::string& id, const NodeType nodeType,
const std::map<std::string, uint64_t>& opMap, bool perUser) override;
virtual void write() override;
static std::string escapeStringForWrite(const std::string& str);
private:
const Config config;
std::unique_ptr<CurlWrapper> curlWrapper;
std::string points;
unsigned numPoints = 0;
mutable Mutex pointsMutex;
mutable Mutex curlMutex;
void setupDatabase() const;
void appendPoint(const std::string& point);
void writePointsUnlocked();
void sendWrite(const std::string& data) const;
void sendQuery(const std::string& data) const;
bool sendPing() const;
};
#endif

View File

@@ -0,0 +1,34 @@
#ifndef TS_DATABASE_H_
#define TS_DATABASE_H_
#include <common/nodes/NodeType.h>
#include <nodes/MetaNodeEx.h>
#include <nodes/StorageNodeEx.h>
#include <app/Config.h>
class TSDatabase
{
public:
static const unsigned connectionRetries = 3;
TSDatabase() {};
virtual ~TSDatabase() {};
virtual void insertMetaNodeData(
std::shared_ptr<Node> node, const MetaNodeDataContent& data) = 0;
virtual void insertStorageNodeData(
std::shared_ptr<Node> node, const StorageNodeDataContent& data) = 0;
virtual void insertHighResMetaNodeData(
std::shared_ptr<Node> node, const HighResolutionStats& data) = 0;
virtual void insertHighResStorageNodeData(
std::shared_ptr<Node> node, const HighResolutionStats& data) = 0;
virtual void insertStorageTargetsData(
std::shared_ptr<Node> node, const StorageTargetInfo& data) = 0;
virtual void insertClientNodeData(
const std::string& id, const NodeType nodeType,
const std::map<std::string, uint64_t>& opMap, bool perUser) = 0;
virtual void write() = 0;
};
#endif

View File

@@ -0,0 +1,49 @@
#include <common/net/message/SimpleMsg.h>
#include <common/net/message/NetMessageTypes.h>
#include <common/net/message/mon/RequestMetaDataRespMsg.h>
#include <common/net/message/mon/RequestStorageDataRespMsg.h>
#include <common/net/message/control/DummyMsg.h>
#include <common/net/message/control/GenericResponseMsg.h>
#include <common/net/message/nodes/GetClientStatsRespMsg.h>
#include <common/net/message/nodes/GetMirrorBuddyGroupsRespMsg.h>
#include <common/net/message/nodes/GetNodesRespMsg.h>
#include <common/net/message/nodes/GetTargetMappingsRespMsg.h>
#include <common/net/message/storage/lookup/FindOwnerRespMsg.h>
#include <net/message/nodes/HeartbeatMsgEx.h>
#include "NetMessageFactory.h"
/**
* @return NetMessage that must be deleted by the caller
* (msg->msgType is NETMSGTYPE_Invalid on error)
*/
std::unique_ptr<NetMessage> NetMessageFactory::createFromMsgType(unsigned short msgType) const
{
NetMessage* msg;
switch(msgType)
{
// The following lines shoudle be grouped by "type of the message" and ordered alphabetically
// inside the groups. There should always be one message per line to keep a clear layout
// (although this might lead to lines that are longer than usual)
case NETMSGTYPE_FindOwnerResp: { msg = new FindOwnerRespMsg(); } break;
case NETMSGTYPE_GenericResponse: { msg = new GenericResponseMsg(); } break;
case NETMSGTYPE_GetClientStatsResp: { msg = new GetClientStatsRespMsg(); } break;
case NETMSGTYPE_GetMirrorBuddyGroupsResp: { msg = new GetMirrorBuddyGroupsRespMsg(); } break;
case NETMSGTYPE_GetNodesResp: { msg = new GetNodesRespMsg(); } break;
case NETMSGTYPE_GetTargetMappingsResp: { msg = new GetTargetMappingsRespMsg(); } break;
case NETMSGTYPE_Heartbeat: { msg = new HeartbeatMsgEx(); } break;
case NETMSGTYPE_RequestMetaDataResp: { msg = new RequestMetaDataRespMsg(); } break;
case NETMSGTYPE_RequestStorageDataResp: { msg = new RequestStorageDataRespMsg(); } break;
default:
{
msg = new SimpleMsg(NETMSGTYPE_Invalid);
} break;
}
return std::unique_ptr<NetMessage>(msg);
}

View File

@@ -0,0 +1,13 @@
#ifndef NETMESSAGEFACTORY_H_
#define NETMESSAGEFACTORY_H_
#include <common/Common.h>
#include <common/net/message/AbstractNetMessageFactory.h>
class NetMessageFactory : public AbstractNetMessageFactory
{
protected:
virtual std::unique_ptr<NetMessage> createFromMsgType(unsigned short msgType) const override;
} ;
#endif /*NETMESSAGEFACTORY_H_*/

View File

@@ -0,0 +1,11 @@
#ifndef HEARTBEATMSGEX_H_
#define HEARTBEATMSGEX_H_
#include <common/net/message/nodes/HeartbeatMsg.h>
// This is only a dummy so the mgmt download doesn't fail
class HeartbeatMsgEx : public HeartbeatMsg
{};
#endif /*HEARTBEATMSGEX_H_*/

View File

@@ -0,0 +1,17 @@
#include "MetaNodeEx.h"
MetaNodeEx::MetaNodeEx(std::shared_ptr<Node> receivedNode) :
Node(NODETYPE_Meta, receivedNode->getAlias(), receivedNode->getNumID(),
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
receivedNode->getConnPool()->getNicList()),
isResponding(true)
{}
MetaNodeEx::MetaNodeEx(std::shared_ptr<Node> receivedNode, std::shared_ptr<MetaNodeEx> oldNode) :
Node(NODETYPE_Meta, receivedNode->getAlias(), receivedNode->getNumID(),
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
receivedNode->getConnPool()->getNicList())
{
setLastStatRequestTime(oldNode->getLastStatRequestTime());
setIsResponding(oldNode->getIsResponding());
}

View File

@@ -0,0 +1,55 @@
#ifndef METANODEEX_H_
#define METANODEEX_H_
#include <common/nodes/Node.h>
#include <common/Common.h>
#include <common/threading/RWLockGuard.h>
struct MetaNodeDataContent
{
bool isResponding;
unsigned indirectWorkListSize;
unsigned directWorkListSize;
unsigned sessionCount;
std::string hostnameid;
};
class MetaNodeEx: public Node
{
public:
MetaNodeEx(std::shared_ptr<Node> receivedNode);
MetaNodeEx(std::shared_ptr<Node> receivedNode, std::shared_ptr<MetaNodeEx> oldNode);
private:
mutable RWLock lock;
bool isResponding;
std::chrono::milliseconds lastStatRequestTime{0};
public:
std::chrono::milliseconds getLastStatRequestTime() const
{
RWLockGuard safeLock(lock, SafeRWLock_READ);
return lastStatRequestTime;
}
void setLastStatRequestTime(const std::chrono::milliseconds& time)
{
RWLockGuard safeLock(lock, SafeRWLock_WRITE);
lastStatRequestTime = time;
}
bool getIsResponding() const
{
RWLockGuard safeLock(lock, SafeRWLock_READ);
return isResponding;
}
void setIsResponding(bool isResponding)
{
RWLockGuard safeLock(lock, SafeRWLock_WRITE);
this->isResponding = isResponding;
}
};
#endif /*METANODEEX_H_*/

View File

@@ -0,0 +1,6 @@
#include "MgmtNodeEx.h"
MgmtNodeEx::MgmtNodeEx(std::string nodeID, NumNodeID nodeNumID, unsigned short portUDP,
unsigned short portTCP, NicAddressList& nicList) :
Node(NODETYPE_Mgmt, nodeID, nodeNumID, portUDP, portTCP, nicList)
{}

View File

@@ -0,0 +1,37 @@
#ifndef MGMTNODEEX_H_
#define MGMTNODEEX_H_
#include <common/nodes/Node.h>
#include <common/Common.h>
#include <mutex>
struct MgmtdNodeDataContent
{
bool isResponding;
};
class MgmtNodeEx : public Node
{
public:
MgmtNodeEx(std::string nodeID, NumNodeID nodeNumID, unsigned short portUDP,
unsigned short portTCP, NicAddressList& nicList);
private:
MgmtdNodeDataContent data;
public:
MgmtdNodeDataContent getContent()
{
const std::lock_guard<Mutex> lock(mutex);
return this->data;
}
void setContent(MgmtdNodeDataContent content)
{
const std::lock_guard<Mutex> lock(mutex);
this->data = content;
}
};
#endif /*MGMTNODEEX_H_*/

View File

@@ -0,0 +1,38 @@
#include "NodeStoreMetaEx.h"
#include <common/app/log/Logger.h>
#include <nodes/MetaNodeEx.h>
NodeStoreMetaEx::NodeStoreMetaEx() :
NodeStoreServers(NODETYPE_Meta, false)
{}
NodeStoreResult NodeStoreMetaEx::addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
NumNodeID* outNodeNumID)
{
// sanity check: don't allow nodeNumID==0 (only mgmtd allows this)
if (!receivedNode->getNumID())
return NodeStoreResult::Error;
std::shared_ptr<MetaNodeEx> newNode;
auto storedNode =
std::static_pointer_cast<MetaNodeEx>(referenceNode(receivedNode->getNumID()));
if (!storedNode)
{
// new node, create StorageNodeEx object with the parameters of the received node info
newNode = std::make_shared<MetaNodeEx>(receivedNode);
LOG(GENERAL, DEBUG, "Received new meta node.",
("nodeNumID", receivedNode->getNumID().val()));
}
else
{
// already stored node, create StorageNodeEx object with the parameters of the
// received node info and keep the internal data
newNode = std::make_shared<MetaNodeEx>(receivedNode, storedNode);
LOG(GENERAL, DEBUG, "Received update for meta node.",
("nodeNumID", receivedNode->getNumID().val()));
}
const std::lock_guard<Mutex> lock(mutex);
return addOrUpdateNodeUnlocked(std::move(newNode), nullptr);
}

View File

@@ -0,0 +1,16 @@
#ifndef NODESTOREMETAEX_H_
#define NODESTOREMETAEX_H_
#include <common/nodes/NodeStore.h>
class NodeStoreMetaEx : public NodeStoreServers
{
public:
NodeStoreMetaEx();
virtual NodeStoreResult addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
NumNodeID* outNodeNumID) override;
};
#endif /*NODESTOREMETAEX_H_*/

View File

@@ -0,0 +1,29 @@
#include "NodeStoreMgmtEx.h"
NodeStoreMgmtEx::NodeStoreMgmtEx() :
NodeStoreServers(NODETYPE_Mgmt, false)
{}
NodeStoreResult NodeStoreMgmtEx::addOrUpdateNodeEx(std::shared_ptr<Node> node, NumNodeID* outNodeNumID)
{
std::string nodeID(node->getAlias());
NumNodeID nodeNumID = node->getNumID();
// sanity check: don't allow nodeNumID==0 (only mgmtd allows this)
if (!node->getNumID())
return NodeStoreResult::Error;
const std::lock_guard<Mutex> lock(mutex);
// check if this is a new node
auto iter = activeNodes.find(nodeNumID);
if (iter == activeNodes.end() )
{
NicAddressList nicList = node->getNicList();
node = boost::make_unique<MgmtNodeEx>(nodeID, nodeNumID, node->getPortUDP(),
node->getPortTCP(), nicList);
}
return addOrUpdateNodeUnlocked(std::move(node), outNodeNumID);
}

View File

@@ -0,0 +1,15 @@
#ifndef NODESTOREMGMTDEX_H_
#define NODESTOREMGMTDEX_H_
#include <common/nodes/NodeStore.h>
#include <nodes/MgmtNodeEx.h>
class NodeStoreMgmtEx : public NodeStoreServers
{
public:
NodeStoreMgmtEx();
virtual NodeStoreResult addOrUpdateNodeEx(std::shared_ptr<Node> node, NumNodeID* outNodeNumID) override;
};
#endif /*NODESTOREMGMTDEX_H_*/

View File

@@ -0,0 +1,38 @@
#include "NodeStoreStorageEx.h"
#include <common/app/log/Logger.h>
#include <nodes/StorageNodeEx.h>
NodeStoreStorageEx::NodeStoreStorageEx() :
NodeStoreServers(NODETYPE_Storage, false)
{}
NodeStoreResult NodeStoreStorageEx::addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
NumNodeID* outNodeNumID)
{
// sanity check: don't allow nodeNumID==0 (only mgmtd allows this)
if (!receivedNode->getNumID())
return NodeStoreResult::Error;
std::shared_ptr<StorageNodeEx> newNode;
auto storedNode =
std::static_pointer_cast<StorageNodeEx>(referenceNode(receivedNode->getNumID()));
if (!storedNode)
{
// new node, create StorageNodeEx object with the parameters of the received node info
newNode = std::make_shared<StorageNodeEx>(receivedNode);
LOG(GENERAL, DEBUG, "Received new storage node.",
("nodeNumID", receivedNode->getNumID().val()));
}
else
{
// already stored node, create StorageNodeEx object with the parameters of the
// received node info and keep the internal data
newNode = std::make_shared<StorageNodeEx>(receivedNode, storedNode);
LOG(GENERAL, DEBUG, "Received update for storage node.",
("nodeNumID", receivedNode->getNumID().val()));
}
const std::lock_guard<Mutex> lock(mutex);
return addOrUpdateNodeUnlocked(std::move(newNode), outNodeNumID);
}

View File

@@ -0,0 +1,15 @@
#ifndef NODESTORESTORAGEEX_H_
#define NODESTORESTORAGEEX_H_
#include <common/nodes/NodeStore.h>
class NodeStoreStorageEx : public NodeStoreServers
{
public:
NodeStoreStorageEx();
virtual NodeStoreResult addOrUpdateNodeEx(std::shared_ptr<Node> receivedNode,
NumNodeID* outNodeNumID) override;
};
#endif /*NODESTORESTORAGEEX_H_*/

View File

@@ -0,0 +1,18 @@
#include "StorageNodeEx.h"
StorageNodeEx::StorageNodeEx(std::shared_ptr<Node> receivedNode) :
Node(NODETYPE_Storage, receivedNode->getAlias(), receivedNode->getNumID(),
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
receivedNode->getConnPool()->getNicList()),
isResponding(true)
{}
StorageNodeEx::StorageNodeEx(std::shared_ptr<Node> receivedNode,
std::shared_ptr<StorageNodeEx> oldNode) :
Node(NODETYPE_Storage, receivedNode->getAlias(), receivedNode->getNumID(),
receivedNode->getPortUDP(), receivedNode->getPortTCP(),
receivedNode->getConnPool()->getNicList())
{
setLastStatRequestTime(oldNode->getLastStatRequestTime());
setIsResponding(oldNode->getIsResponding());
}

View File

@@ -0,0 +1,61 @@
#ifndef STORAGENODEEX_H_
#define STORAGENODEEX_H_
#include <common/nodes/Node.h>
#include <common/Common.h>
#include <common/threading/RWLockGuard.h>
struct StorageNodeDataContent
{
bool isResponding;
unsigned indirectWorkListSize;
unsigned directWorkListSize;
int64_t diskSpaceTotal;
int64_t diskSpaceFree;
int64_t diskRead;
int64_t diskWrite;
unsigned sessionCount;
std::string hostnameid;
};
class StorageNodeEx : public Node
{
public:
StorageNodeEx(std::shared_ptr<Node> receivedNode);
StorageNodeEx(std::shared_ptr<Node> receivedNode, std::shared_ptr<StorageNodeEx> oldNode);
private:
mutable RWLock lock;
bool isResponding;
std::chrono::milliseconds lastStatRequestTime{0};
public:
std::chrono::milliseconds getLastStatRequestTime() const
{
RWLockGuard safeLock(lock, SafeRWLock_READ);
return lastStatRequestTime;
}
void setLastStatRequestTime(const std::chrono::milliseconds& time)
{
RWLockGuard safeLock(lock, SafeRWLock_READ);
lastStatRequestTime = time;
}
bool getIsResponding() const
{
RWLockGuard safeLock(lock, SafeRWLock_READ);
return isResponding;
}
void setIsResponding(bool isResponding)
{
RWLockGuard safeLock(lock, SafeRWLock_READ);
this->isResponding = isResponding;
}
};
#endif /*STORAGENODEEX_H_*/