beegfs/storage/build/dist/etc/beegfs-storage.conf
2025-08-10 01:34:16 +02:00

544 lines
20 KiB
Plaintext

# This is a config file for BeeGFS storage nodes.
# http://www.beegfs.com
# --- [Table of Contents] ---
#
# 1) Settings
# 2) Command Line Arguments
# 3) Basic Settings Documentation
# 4) Advanced Settings Documentation
#
# --- Section 1.1: [Basic Settings] ---
#
sysMgmtdHost =
storeStorageDirectory =
storeAllowFirstRunInit = true
storeFsUUID =
#
# --- Section 1.2: [Advanced Settings] ---
#
connAuthFile = /etc/beegfs/conn.auth
connDisableAuthentication = false
connBacklogTCP = 128
connInterfacesFile =
connMaxInternodeNum = 12
connMgmtdPort = 8008
connStoragePort = 8003
connPortShift = 0
connNetFilterFile =
connUseRDMA = true
connRDMATypeOfService = 0
connTcpOnlyFilterFile =
logType = syslog
logLevel = 3
logNoDate = false
logNumLines = 50000
logNumRotatedFiles = 5
logStdFile = /var/log/beegfs-storage.log
runDaemonized = true
sysResyncSafetyThresholdMins = 10
sysTargetOfflineTimeoutSecs = 180
tuneBindToNumaZone =
tuneFileReadAheadSize = 0m
tuneFileReadAheadTriggerSize = 4m
tuneFileReadSize = 128k
tuneFileWriteSize = 128k
tuneFileWriteSyncSize = 0m
tuneNumResyncGatherSlaves = 6
tuneNumResyncSlaves = 12
tuneNumStreamListeners = 1
tuneNumWorkers = 12
tuneUseAggressiveStreamPoll = false
tuneUsePerTargetWorkers = true
tuneUsePerUserMsgQueues = false
tuneWorkerBufSize = 4m
#
# --- Section 2: [Command Line Arguments] ---
#
# Use the command line argument "cfgFile=/etc/anotherconfig.conf" to
# specify a different config file for beegfs_storage.
#
# All other options in this file can also be used as command line
# arguments, overriding the corresponding config file values.
#
# --- Section 3: [Basic Settings Documentation] ---
#
# [sysMgmtdHost]
# Hostname (or IP) of the host running the management service.
# (See also "connMgmtdPort")
# Default: <none>
# [storeStorageDirectory]
# The absoute path to a storage target. A storage target is a directory where
# the file system can store raw user file contents. Multiple targets can be
# specified as a comma-separated list.
# Example: /mnt/beegfs_storage1,/mnt/beegfs_storage2
# Default: <none>
# [storeAllowFirstRunInit]
# Enables or disables daemon startup with an uninitialized storage directory.
# This can be used to make sure that the daemon does not run when the storage
# partition is not mounted (e.g. because it needs repair after a power outage).
# Note: This setting must be enabled during first startup of the daemon, but
# may be disabled afterwards.
# Default: true
# [storeFsUUID]
# Requires the underlying file systems of the storage targets to have the same
# UUID as set here. This prevents the storage node from accidentaly starting targets
# from a wrong device, e.g. when it is not properly mounted. To find the UUID to
# put here, you can, for example, use blkid:
#
# blkid -s UUID
#
# This will output all devices on the host with their file systems UUID (if there
# is one). Choose the correct ones and list them here. This command needs to be run
# as root.
#
# The UUIDs need to be listed the same way and order as the storage targets paths
# provided with the storeStorageDirectory setting above as they are checked against
# the paths in that order.
#
# If left empty, the check is skipped. It is highly recommended to enable this check
# after installation to prevent data corruption.
# Default: <none>
#
#
# --- Section 4: [Advanced Settings Documentation] ---
#
#
# --- Section 4.1: [Connections & Communication] ---
#
# [connAuthFile]
# The path to a file that contains a shared secret for connection based
# authentication. Only peers that use the same shared secret will be able to
# connect.
# Default: <none>
# [connDisableAuthentication]
# If set to true, explicitly disables connection authentication and allow the
# service to run without a connAuthFile. Running BeeGFS without connection
# authentication is considered insecure and is not recommended.
# Default: false
# [connBacklogTCP]
# The TCP listen backlog.
# Default: 128
# [connInterfacesFile]
# The path to a text file that specifies the names of the interfaces which
# may be used for communication. One interface per line. The line number also
# defines the priority of the interface.
# Example: "ib0" in the first line, "eth0" in the second line.
# Values: This setting is optional. If unspecified, all available interfaces
# will be used and priorities will be assigned automatically.
# Note: This information is sent to other hosts to inform them about possible
# communication paths. See connRestrictOutboundInterfaces for this
# configuration's potential effect on outbound connections.
# Default: <none>
# [connInterfacesList]
# Comma-separated list of interface names. Performs the same function as
# connInterfacesFile.
# Default: <none>
# [connRestrictOutboundInterfaces]
# The default behavior of BeeGFS is to use any available network interface
# to establish an outbound connection to a node, according to the TCP/IP
# configuration of the operating system. When connRestrictOutboundInterfaces
# is set to true, the network interfaces used for outbound connections are
# limited to the values specified by connInterfacesFile or connInterfacesList.
# The operating system routing tables are consulted to determine which
# interface to use for a particular node's IP address. If there is no
# route from the configured interfaces that is suitable for a node's IP
# addresses then the connection will fail to be established.
# Default: false
# [connNoDefaultRoute]
# When connRestrictOutboundInterfaces is true, the routing logic would use
# the default route for a Node's IP address when no specific route for that
# address is found in the routing tables. This can be problematic during a
# failure situation, as the default route is not appropriate to use for a
# subnet that is accessible from an interface that has failed.
# connNoDefaultRoute is a comma-separated list of CIDRs that should never
# be accessed via the default route.
# Default: 0.0.0.0/0. This prevents the default route from ever being used.
# [connMaxInternodeNum]
# The maximum number of simultaneous connections to the same node.
# Default: 12
# [connMgmtdPort]
# The UDP and TCP port of the management node.
# Default: 8008
# [connStoragePort]
# The UDP and TCP port of the storage node.
# Default: 8003
# [connPortShift]
# Shifts all following UDP and TCP ports according to the specified value.
# Intended to make port configuration easier in case you do not want to
# configure each port individually.
# Default: 0
# [connNetFilterFile]
# The path to a text file that specifies allowed IP subnets, which may be used
# for outgoing communication. One subnet per line in classless notation (IP
# address and number of significant bits).
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
# line.
# Values: This setting is optional. If unspecified, all addresses are allowed
# for outgoing communication.
# Default: <none>
# [connTCPRcvBufSize], [connUDPRcvBufSize]
# Sets the size for TCP and UDP socket receive buffers (SO_RCVBUF). The maximum
# allowed value is determined by sysctl net.core.rmem_max. This value is
# ignored if it is less than the default value determined by
# net.core.rmem_default.
# For legacy reasons, the default value 0 indicates that the buffer size is set
# to connRDMABufNum * connRDMABufSize.
# -1 indicates that the buffer size should be left at the system default.
# Default: 0
# [connUseRDMA]
# Enables the use of Remote Direct Memory Access (RDMA) for Infiniband.
# This setting only has effect if libbeegfs-ib is installed.
# Default: true
# [connRDMABufNum], [connRDMABufSize]
# Infiniband RDMA buffer settings.
# connRDMABufSize is the maximum size of a buffer (in bytes) that will be sent
# over the network; connRDMABufNum is the number of available buffers that can
# be in flight for a single connection. These client settings are also applied
# on the server side for each connection.
# Note: RAM usage per connection is connRDMABufSize x connRDMABufNum x 2. Keep
# resulting RAM usage (x connMaxInternodeNum x number_of_clients) on the
# server in mind when increasing these values.
# Note: The client needs to allocate physically contiguous pages for
# connRDMABufSize, so this setting shouldn't be higher than a few kbytes.
# Default: 8192, 70
# [connRDMATypeOfService]
# Infiniband provides the option to set a type of service for an application.
# This type of service can be used by your subnet manager to provide Quality of
# Service functionality (e.g. setting different service levels).
# In openSM the service type will be mapped to the parameter qos-class, which
# can be handled in your QoS configuration.
# See
# www.openfabrics.org/downloads/OFED/ofed-1.4/OFED-1.4-docs/
# QoS_management_in_OpenSM.txt
# for more information on how to configure openSM for QoS.
# This parameter sets the type of service for all outgoing connections of this
# daemon.
# Default: 0 (Max: 255)
# [connTcpOnlyFilterFile]
# The path to a text file that specifies IP address ranges to which no RDMA
# connection should be established. This is useful e.g. for environments where
# all hosts support RDMA, but some hosts cannot connect via RDMA to some other
# hosts.
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
# line.
# Values: This setting is optional.
# Default: <none>
# [connMessagingTimeouts]
# These constants are used to set some of the connection timeouts for sending
# and receiving data between services in the cluster. They used to be hard-coded
# (CONN_LONG_TIMEOUT, CONN_MEDIUM_TIMEOUT and CONN_SHORT_TIMEOUT) but are now
# made configurable for experimentation purposes.
# This option takes three integer values of milliseconds, separated by a comma
# in the order long, medium, short.
# WARNING: This is an EXPERIMENTAL configuration option that should not be
# changed in production environments unless properly tested and validated.
# Some configurations can lead to service lockups and other subtle issues.
# Please make sure that you know exactly what you are doing and properly
# test any changes you make.
# Default: 600000,90000,30000
# [connRDMATimeouts]
# These constants are used to set some of the timeouts for sending and receiving
# data between services in the cluster via RDMA. They used to be
# hard-coded IBVSOCKET_CONN_TIMEOUT_MS, IBVSOCKET_FLOWCONTROL_ONSEND_TIMEOUT_MS
# and a 10000 literal for poll timeout but are now made configurable for
# experimentation purposes.
# This option takes three integer values of milliseconds, separated by a comma
# in the order connectMS, flowSendMS and pollMS.
# WARNING: This is an EXPERIMENTAL configuration option that should not be
# changed in production environments unless properly tested and validated.
# Some configurations can lead to service lockups and other subtle issues.
# Please make sure that you know exactly what you are doing and properly
# test any changes you make.
# Default: 3000,180000,7500
# [connFallbackExpirationSecs]
# The time in seconds after which a connection to a fallback interface expires.
# When a fallback connection expires, the system will try to establish a new
# connection to the other hosts primary interface (falling back to another
# interface again if necessary).
# Note: The priority of node interfaces can be configured using the
# "connInterfacesFile" parameter.
# Default: 900
#
# --- Section 4.2: [Logging] ---
#
# [logType]
# Defines the logger type. This can either be "syslog" to send log messages to
# the general system logger or "logfile". If set to logfile logs will be written
# to logStdFile.
# Default: logfile
# [logLevel]
# Defines the amount of output messages. The higher this level, the more
# detailed the log messages will be.
# Note: Levels above 3 might decrease performance.
# Default: 3 (Max: 5)
# [logNoDate]
# Defines whether "date & time" (=false) or the current "time only" (=true)
# should be logged.
# Default: false
# [logNumLines]
# The maximum number of lines per log file.
# Default: 50000
# [logNumRotatedFiles]
# The number of old files to keep when "logNumLines" is reached and the log file
# is rewritten (log rotation).
# Default: 5
# [logStdFile]
# The path and filename of the log file for standard log messages.
# The parameter will be considered only if logType value is not equal to syslog.
# If no name is specified, the messages will be written to the console.
# Default: /var/log/beegfs_storage.log
#
# --- Section 4.4: [Startup] ---
#
# [runDaemonized]
# Detach the process from its parent (and from stdin/-out/-err).
# Default: true
#
# --- Section 4.5: [System Settings] ---
#
# [sysResyncSafetyThresholdMins]
# Automatic mirror resyncs use the last successful communication time between
# two mirror buddies to skip verification of files that were not recently
# modified before a server went offline. As BeeGFS uses server-side write
# caching (where the cache is flushed to disk every minute by the Linux kernel),
# it is possible that a server looses its write cache in case of a crash, which
# contained data before the last successful communication. This value adds an
# extra amount of time to the last successful communication timestamp to include
# the time window of a potential cache loss.
# The value may be 0 (which doesn't mean there is no threshold) to completely
# disable the use of the last successful communication timestamp,
# i.e. that a full resync will be done.
# Values: time in minutes
# Default: 10
# [sysTargetOfflineTimeoutSecs]
# Timeout until targets on a storage server are considered offline by the
# management node when no target state updates can be fetched from that server.
# Note: This must be the same value as in the /etc/beegfs/beegfs-mgmtd.conf on
# the management node.
# Values: time in seconds
# Default: 180
#
# --- Section 4.6: [Tuning] ---
#
# [tuneBindToNumaZone]
# Defines the zero-based NUMA zone number to which all threads of this process
# should be bound. If unset, all available CPU cores may be used.
# Zone binding is especially useful if the corresponding devices (e.g. storage
# controller and network card) are also attached to the same zone.
# Note: The Linux kernel shows NUMA zones at /sys/devices/system/node/nodeXY
# Default: <unset>
# [tuneFileReadAheadSize], [tuneFileReadAheadTriggerSize]
# tuneFileReadAheadSize is the byte range submitted to the kernel for read-head
# after at least tuneFileReadAheadTriggerSize file bytes were read sequentially
# from a target.
# Values: A typical setting is tuneFileReadAheadSize=2m. The optimal setting
# depends on your storage system configuration (e.g. your RAID layout).
# Default: tuneFileReadAheadSize=0, tuneFileReadAheadTriggerSize=4m
# [tuneFileReadSize], [tuneFileWriteSize]
# The maximum amount of data that the server should write to (or read from)
# the underlying local file system in a single operation.
# Note: Setting these values higher than the file chunk size or
# tuneWorkerBufSize has no effect.
# Default: tuneFileReadSize=128k, tuneFileWriteSize=128k
# [tuneFileWriteSyncSize]
# The number of sequentially written bytes (per file) after which the kernel
# will be advised to commit the written data to the underlying storage device.
# This is intended to avoid delays until the kernel notices that it is time to
# commit written data, which would reduce streaming write throughput.
# Note: When this setting is enabled, it is important to use the deadline
# scheduler (/sys/block/<...>/scheduler) to avoid reader starvation. It is
# also important to use a large request queue (/sys/block/<...>/nr_requests),
# as writes can only be asynchronous while there are free slots in the queue.
# Values: "0" disables this mechanism. Use "32m" (or a close even multiple of
# your RAID stripe set size) to test the effects of this.
# Default: 0
# [tuneNumResyncGatherSlaves]
# The number of threads (per target) used to gather file system information for
# a buddy mirror resync.
# Default: 6
# [tuneNumResyncSlaves]
# The number of threads (per target) used to perform the actual file and
# directory synchronizations for a buddy mirror resync.
# Default: 12
# [tuneNumStreamListeners]
# The number of threads waiting for incoming data events. Connections with
# incoming data will be handed over to the worker threads for actual message
# processing.
# Default: 1
# [tuneNumWorkers]
# The number of worker threads. Higher number of workers allows the server to
# handle more client requests in parallel, which also results in more
# concurrent access to the underlying storage device.
# Note: See also tuneUsePerTargetWorkers.
# Default: 12
# [tuneUseAggressiveStreamPoll]
# If set to true, the StreamListener component, which waits for incoming
# requests, will keep actively polling for events instead of sleeping until
# an event occurs. Active polling will reduce latency for processing of
# incoming requests at the cost of higher CPU usage.
# Default: false
# [tuneUsePerTargetWorkers]
# If set to true, a separate set of worker threads is created and exclusively
# assigned to each attached storage target. If set to false, a global set of
# worker threads is used and each worker thread can handle requests for all
# targets.
# Separate worker threads are intended to improve balance of I/O workload
# across targets under high load (i.e. when the number of concurrently incoming
# requests is higher than the number of worker threads).
# Note: If set to true, the actual number of created worker threads is
# tuneNumWorkers x number_of_attached_targets.
# Default: true
# [tuneUsePerUserMsgQueues]
# If set to true, per-user queues will be used to decide which of the pending
# requests is handled by the next available worker thread. If set to false, a
# single queue will be used and incoming requests will be processed in
# first-come, first-served order.
# Per-user queues are intended to improve fairness in multi-user environments.
# Default: false
# [tuneWorkerBufSize]
# The buffer size, which is allocated twice by each worker thread for IO and
# network data buffering.
# Note: For optimal performance, this value must be at least 1MB higher than
# tuneFileReadSize and tuneFileWriteSize.
# Default: 4m
#
# --- Section 4.7: [Quota settings] ---
#
# [quotaEnableEnforcement]
# Enables enforcement of user and group quota limits by periodically checking
# if the limits are exceeded.
# Note: This uses quota information provided by the underlying local file
# systems of the storage targets.
# Note: Set quota limits with "beegfs-ctl --setquota".
# Note: If this option is true, performance might be slightly decreased due to
# extra information tracking.
# Note: Must be set to the same value in meta servers and mgmtd to be
# effective.
# Default: false
#
# --- Section 5: [Expert options] ---
#
# [tuneProcessFDLimit]
# Sets the maximum number of files the server can open. If the process rlimit
# is already larger than this number the limit will not be decreased.
# Default: 50000
# [tuneWorkerNumaAffinity]
# Distributes worker threads equally among NUMA nodes on the system when set.
# Default: false
# [tuneListenerNumaAffinity]
# Distributes listener threads equally among NUMA nodes on the system when set.
# Default: false
# [tuneListenerPrioShift]
# Applies a niceness offset to listener threads. Negative values will decrease
# niceness (increse priority), positive values will increase niceness (decrease
# priority).
# Default: -1
# [tuneDirCacheLimit]
# Number of recently used chunk directory structures to keep in memory.
# Increasing this value may reduce memory allocations.
# Default: 1024
# [tuneEarlyStat]
# Compute file size and storage block usage of a chunk before close() is set.
# Some filesystems may report block usage greater than required to hold all
# file data if stat() is called before close().
# Default: false
# [quotaDisableZfsSupport]
# Disable quota support for ZFS if quota is enabled.
# ZFS quota requires the libzfs library. Errors while loading the library are
# reported if quota is enabled and the library is not installed (or the wrong
# version is installed)
# Default: false
# [pidFile]
# Creates a PID file for the daemon when set. Set by init scripts.
# Default: <none>