beegfs/meta/build/dist/etc/beegfs-meta.conf
2025-08-10 01:34:16 +02:00

615 lines
23 KiB
Plaintext

# This is a config file for BeeGFS metadata nodes.
# http://www.beegfs.com
# --- [Table of Contents] ---
#
# 1) Settings
# 2) Command Line Arguments
# 3) Basic Settings Documentation
# 4) Advanced Settings Documentation
#
# --- Section 1.1: [Basic Settings] ---
#
sysMgmtdHost =
storeMetaDirectory =
storeAllowFirstRunInit = true
storeFsUUID =
#
# --- Section 1.2: [Advanced Settings] ---
#
connAuthFile = /etc/beegfs/conn.auth
connDisableAuthentication = false
connBacklogTCP = 128
connFallbackExpirationSecs = 900
connInterfacesFile =
connMaxInternodeNum = 32
connMetaPort = 8005
connMgmtdPort = 8008
connPortShift = 0
connNetFilterFile =
connUseRDMA = true
connRDMATypeOfService = 0
connTcpOnlyFilterFile =
logType = syslog
logLevel = 3
logNoDate = false
logNumLines = 50000
logNumRotatedFiles = 5
logStdFile = /var/log/beegfs-meta.log
runDaemonized = true
storeClientXAttrs = false
storeClientACLs = false
storeUseExtendedAttribs = true
sysTargetAttachmentFile =
sysTargetOfflineTimeoutSecs = 180
sysAllowUserSetPattern = false
tuneBindToNumaZone =
tuneNumStreamListeners = 1
tuneNumWorkers = 0
tuneTargetChooser = randomized
tuneUseAggressiveStreamPoll = false
tuneUsePerUserMsgQueues = false
#
# --- Section 2: [Command Line Arguments] ---
#
# Use the command line argument "cfgFile=/etc/anotherconfig.conf" to
# specify a different config file for beegfs-meta.
# All other options in this file can also be used as command line
# arguments, overriding the corresponding config file values.
#
# --- Section 3: [Basic Settings Documentation] ---
#
# [sysMgmtdHost]
# Hostname (or IP) of the host running the management service.
# (See also "connMgmtdPort")
# Default: <none>
# [storeMetaDirectory]
# The absolute path and name of a directory where the file system can store its
# metadata.
# Default: <none>
# [storeFsUUID]
# Requires the underlying file system of the metadata directory to have the same
# UUID as set here. This prevents the meta node from accidentaly starting from the
# wrong device, e.g. when it is not properly mounted. To find the UUID to
# put here, you can, for example, use blkid:
#
# blkid -s UUID
#
# This will output all devices on the host with their file systems UUID (if there
# is one). Choose the correct one and copy it here. This command needs to be run
# as root.
#
# If left empty, the check is skipped. It is highly recommended to enable this check
# after installation to prevent data corruption.
# Default: <none>
# [storeAllowFirstRunInit]
# Enables or disables daemon startup with an uninitialized storage directory.
# This can be used to make sure that the daemon does not run when the storage
# partition is not mounted (e.g. because it needs repair after a power outage).
# Note: This setting must be enabled during first startup of the daemon, but
# may be disabled afterwards.
# Default: true
#
# --- Section 4: [Advanced Settings Documentation] ---
#
#
# --- Section 4.1: [Connections & Communication] ---
#
# [connAuthFile]
# The path to a file that contains a shared secret for connection based
# authentication. Only peers that use the same shared secret will be able to
# connect.
# Default: <none>
# [connDisableAuthentication]
# If set to true, explicitly disables connection authentication and allow the
# service to run without a connAuthFile. Running BeeGFS without connection
# authentication is considered insecure and is not recommended.
# Default: false
# [connBacklogTCP]
# The TCP listen backlog.
# Default: 128
# [connFallbackExpirationSecs]
# The time in seconds after which a connection to a fallback interface expires.
# When a fallback connection expires, the system will try to establish a new
# connection to the other hosts primary interface (falling back to another
# interface again if necessary).
# Note: The priority of node interfaces can be configured using the
# "connInterfacesFile" parameter.
# Default: 900
# [connInterfacesFile]
# The path to a text file that specifies the names of the interfaces which
# may be used for communication by other nodes. One interface per line. The
# line number also defines the priority of an interface.
# Example: "ib0" in the first line, "eth0" in the second line.
# Values: This setting is optional. If unspecified, all available interfaces
# will be published and priorities will be assigned automatically.
# Note: This information is sent to other hosts to inform them about possible
# communication paths. See connRestrictOutboundInterfaces for this
# configuration's potential effect on outbound connections.
# Default: <none>
# [connInterfacesList]
# Comma-separated list of interface names. Performs the same function as
# connInterfacesFile.
# Default: <none>
# [connRestrictOutboundInterfaces]
# The default behavior of BeeGFS is to use any available network interface
# to establish an outbound connection to a node, according to the TCP/IP
# configuration of the operating system. When connRestrictOutboundInterfaces
# is set to true, the network interfaces used for outbound connections are
# limited to the values specified by connInterfacesFile or connInterfacesList.
# The operating system routing tables are consulted to determine which
# interface to use for a particular node's IP address. If there is no
# route from the configured interfaces that is suitable for a node's IP
# addresses then the connection will fail to be established.
# Default: false
# [connNoDefaultRoute]
# When connRestrictOutboundInterfaces is true, the routing logic would use
# the default route for a Node's IP address when no specific route for that
# address is found in the routing tables. This can be problematic during a
# failure situation, as the default route is not appropriate to use for a
# subnet that is accessible from an interface that has failed.
# connNoDefaultRoute is a comma-separated list of CIDRs that should never
# be accessed via the default route.
# Default: 0.0.0.0/0. This prevents the default route from ever being used.
# [connMaxInternodeNum]
# The maximum number of simultaneous connections to the same node.
# Default: 32
# [connMetaPort]
# The UDP and TCP port of the metadata node.
# Default: 8005
# [connMgmtdPort]
# The UDP and TCP port of the management node.
# Default: 8008
# [connPortShift]
# Shifts all following UDP and TCP ports according to the specified value.
# Intended to make port configuration easier in case you do not want to
# configure each port individually.
# Default: 0
# [connNetFilterFile]
# The path to a text file that specifies allowed IP subnets, which may be used
# for outgoing communication. One subnet per line in classless notation (IP
# address and number of significant bits).
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
# line.
# Values: This setting is optional. If unspecified, all addresses are allowed
# for outgoing communication.
# Default: <none>
# [connTCPRcvBufSize], [connUDPRcvBufSize]
# Sets the size for TCP and UDP socket receive buffers (SO_RCVBUF). The maximum
# allowed value is determined by sysctl net.core.rmem_max. This value is
# ignored if it is less than the default value determined by
# net.core.rmem_default.
# For legacy reasons, the default value 0 indicates that the buffer size is set
# to connRDMABufNum * connRDMABufSize.
# -1 indicates that the buffer size should be left at the system default.
# Default: 0
# [connUseRDMA]
# Enables the use of Remote Direct Memory Access (RDMA) for Infiniband.
# This setting only has effect if libbeegfs-ib is installed.
# Default: true
# [connRDMABufNum], [connRDMABufSize]
# Infiniband RDMA buffer settings.
# connRDMABufSize is the maximum size of a buffer (in bytes) that will be sent
# over the network; connRDMABufNum is the number of available buffers that can
# be in flight for a single connection. These client settings are also applied
# on the server side for each connection.
# Note: RAM usage per connection is connRDMABufSize x connRDMABufNum x 2. Keep
# resulting RAM usage (x connMaxInternodeNum x number_of_clients) on the
# server in mind when increasing these values.
# Note: The client needs to allocate physically contiguous pages for
# connRDMABufSize, so this setting shouldn't be higher than a few kbytes.
# Default: 8192, 70
# [connRDMATypeOfService]
# Infiniband provides the option to set a type of service for an application.
# This type of service can be used by your subnet manager to provide Quality of
# Service functionality (e.g. setting different service levels).
# In openSM the service type will be mapped to the parameter qos-class, which
# can be handled in your QoS configuration.
# See
# www.openfabrics.org/downloads/OFED/ofed-1.4/OFED-1.4-docs/
# QoS_management_in_OpenSM.txt
# for more information on how to configure openSM for QoS.
# This parameter sets the type of service for all outgoing connections of this
# daemon.
# Default: 0 (Max: 255)
# [connTcpOnlyFilterFile]
# The path to a text file that specifies IP address ranges to which no RDMA
# connection should be established. This is useful e.g. for environments where
# all hosts support RDMA, but some hosts cannot connect via RDMA to some other
# hosts.
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
# line.
# Values: This setting is optional.
# Default: <none>
# [connMessagingTimeouts]
# These constants are used to set some of the connection timeouts for sending
# and receiving data between services in the cluster. They used to be hard-coded
# (CONN_LONG_TIMEOUT, CONN_MEDIUM_TIMEOUT and CONN_SHORT_TIMEOUT) but are now
# made configurable for experimentation purposes.
# This option takes three integer values of milliseconds, separated by a comma
# in the order long, medium, short.
# WARNING: This is an EXPERIMENTAL configuration option that should not be
# changed in production environments unless properly tested and validated.
# Some configurations can lead to service lockups and other subtle issues.
# Please make sure that you know exactly what you are doing and properly
# test any changes you make.
# Default: 600000,90000,30000
# [connRDMATimeouts]
# These constants are used to set some of the timeouts for sending and receiving
# data between services in the cluster via RDMA. They used to be
# hard-coded IBVSOCKET_CONN_TIMEOUT_MS, IBVSOCKET_FLOWCONTROL_ONSEND_TIMEOUT_MS
# and a 10000 literal for poll timeout but are now made configurable for
# experimentation purposes.
# This option takes three integer values of milliseconds, separated by a comma
# in the order connectMS, flowSendMS and pollMS.
# WARNING: This is an EXPERIMENTAL configuration option that should not be
# changed in production environments unless properly tested and validated.
# Some configurations can lead to service lockups and other subtle issues.
# Please make sure that you know exactly what you are doing and properly
# test any changes you make.
# Default: 3000,180000,7500
#
# --- Section 4.2: [Logging] ---
#
# [logType]
# Defines the logger type. This can either be "syslog" to send log messages to
# the general system logger or "logfile". If set to logfile logs will be written
# to logStdFile.
# Default: logfile
# [logLevel]
# Defines the amount of output messages. The higher this level, the more
# detailed the log messages will be.
# Note: Levels above 3 might decrease performance.
# Default: 3 (Max: 5)
# [logNoDate]
# Defines whether "date & time" (=false) or the current "time only" (=true)
# should be logged.
# Default: false
# [logNumLines]
# The maximum number of lines per log file.
# Default: 50000
# [logNumRotatedFiles]
# The number of old files to keep when "logNumLines" is reached and the log file
# is rewritten (log rotation).
# Default: 5
# [logStdFile]
# The path and filename of the log file for standard log messages. The parameter
# will be considered only if logType value is not equal to syslog. If no name
# is specified, the messages will be written to the console.
# Default: /var/log/beegfs-meta.log
#
# --- Section 4.3: [Startup] ---
#
# [runDaemonized]
# Detach the process from its parent (and from stdin/-out/-err).
# Default: true
#
# --- Section 4.4: [Storage] ---
#
# [storeClientXAttrs]
# Enables client-side extended attributes.
# Note: Can only be enabled if the underlying file system supports extended
# attributes.
# Note: This setting has to be explicitly enabled on the clients as well.
# Default: false
# [storeClientACLs]
# Enables the handling and storage of client-side access control lists.
# As ACLs are stored as extended attributes, this setting mainly concerns the
# enforcement and server-side propagation of directory default ACLs.
# Note: This setting can only be enabled if storeClientXAttrs is set to true.
# Note: This setting has to be explicitly enabled on all clients as well.
# Note: Enabling this setting can affect metadata performance.
# Default: false
# [storeUseExtendedAttribs]
# Controls whether BeeGFS metadata is stored as normal file contents (=false)
# or as extended attributes (=true) on the underlying files system. Depending on
# the type and version of your underlying local file system, extended attributes
# typically are significantly faster.
# Note: This setting can only be configured at first startup and cannot be
# changed afterwards.
# Default: true
#
# --- Section 4.5: [System Settings] ---
#
# [sysTargetAttachmentFile]
# This file provides a specification of which targets should be grouped within
# the same domain for randominternode target chooser. This is useful
# e.g. if randominternode is used with multiple storage daemon
# instances running on the same physical hosts when files should be striped
# across different physical hosts.
# Format: Line-separated <targetID>=<domainID> definition.
# Example: "101=1" in first line, "102=1" in second line, "201=2" in third
# line to define that targets "101" and "102" are part of the same
# domain "1", while target "201" is part of a different domain "2". The
# domain IDs in this file are arbitrary values in range 1..65535, the
# targetIDs are actual targetIDs as in "beegfs-ctl --listtargets".
# Default: <none>
# [sysTargetOfflineTimeoutSecs]
# Timeout until the metadata nodes and storage targets are considered offline
# when no target state updates can be fetched from that node.
# Note: This must be the same value as in the /etc/beegfs/beegfs-mgmtd.conf on
# the management node.
# Values: time in seconds
# Default: 180
# [sysAllowUserSetPattern]
# If set to true, non-privileged users are allowed to modify stripe pattern
# settings for directories they own.
# Default: false
# [sysFileEventLogTarget]
# If set, the metadata server will log modification events (which it receives
# from clients) to a Unix Socket specified here. External tools may listen on
# this socket and process the information.
# Note: Each event will be logged in the following format:
# droppedSeqNo (64bit) - missedSeqNo (64bit) - eventType (32bit) - ModifiedPath
# Increased dropped sequence numbers indicate communication errors. Increased
# missed sequence numbers indicate that a event could not be properly reported.
# The following event types are possible:
# 0(file contents flushed), 1(truncate), 2(set attribute), 3(file closed),
# 4(create), 5(mkdir), 6(mknode), 7(create symlink), 8(rmdir), 9(unlink),
# 10(create hardlink), 11(rename), 12(read)
# Default: <unset>
# Example: sysFileEventLogTarget = unix:/run/beegfs/eventlog
# [sysFileEventPersistDirectory]
# If set, the metadata server will persist modification events to this
# directory. If unset, will persist to "eventq" subdirectory under
# metadata-root.
# When the metadata server starts up, it will try to create that directory
# (non-recursive mkdir()) and initialize a new event persist store in this
# directory.
# If creating the directory fails with EEXIST (directory exists) it will assume
# an existing persist store and try to load it.
# Default: <unset> (storeMetaDirectory + "/eventq")
# [sysFileEventPersistSize]
# If event logging is enabled (see sysFileEventLogTarget), this control
# explicitly sets the size (in bytes) for creating the chunk-store file in the
# eventq directory (sysFileEventPersistDirectory).
# The chunk-store is a file containing a ringbuffer where events get
# persisted. The events will eventually be delivered to downstream services
# (for example bee-watch, hive-index).
# Note that this value has no effect when loading an existing queue directory.
# Default: 0 (use internal defaults / guesswork)
# Example: sysFileEventPersistSize = 2g # 2 Gigabytes
# Note: the value will be rounded up to the next power of 2.
#
# --- Section 4.6: [Tuning] ---
#
# [tuneBindToNumaZone]
# Defines the zero-based NUMA zone number to which all threads of this process
# should be bound. If unset, all available CPU cores may be used.
# Zone binding is especially useful if the corresponding devices (e.g. storage
# controller and network card) are also attached to the same zone.
# Note: The Linux kernel shows NUMA zones at /sys/devices/system/node/nodeXY
# Default: <unset>
# [tuneNumStreamListeners]
# The number of threads waiting for incoming data events. Connections with
# incoming data will be handed over to the worker threads for actual message
# processing.
# Default: 1
# [tuneNumWorkers]
# The number of worker threads. Higher number of workers allows the server to
# handle more client requests in parallel. On dedicated metadata servers, this
# is typically set to a value between four and eight times the number of CPU
# cores.
# Note: 0 means use twice the number of CPU cores (but at least 4).
# Default: 0
# [tuneTargetChooser]
# The algorithm to choose storage targets for file creation.
# Values:
# * randomized: choose targets in a random fashion.
# * roundrobin: choose targets in a deterministic round-robin fashion.
# (Use this only for benchmarking of large-file streaming throughput.)
# * randomrobin: randomized round-robin; choose targets in a deterministic
# round-robin fashion, but random shuffle the result targets list.
# * randominternode: choose random targets that are assigned to different
# storage nodeIDs. (See sysTargetAttachmentFile if multiple storage
# storage daemon instances are running on the same physical host.)
# Note: Only the randomized chooser honors client's preferred nodes/targets
# settings.
# Default: randomized
# [tuneUseAggressiveStreamPoll]
# If set to true, the StreamListener component, which waits for incoming
# requests, will keep actively polling for events instead of sleeping until
# an event occurs. Active polling will reduce latency for processing of
# incoming requests at the cost of higher CPU usage.
# Default: false
# [tuneUsePerUserMsgQueues]
# If set to true, per-user queues will be used to decide which of the pending
# requests is handled by the next available worker thread. If set to false,
# a single queue will be used and incoming requests will be processed in
# first-come, first-served order.
# Per-user queues are intended to improve fairness in multi-user environments.
# Default: false
# [tuneWorkerBufSize]
# The buffer size, which is allocated twice by each worker thread for IO and
# network data buffering.
# Note: For optimal performance, this value must be at least 128k.
# Default: 1m
# [tuneNumResyncSlaves]
# The number of threads used to perform the bulk synchronizations for a buddy
# mirror resync.
# Default: 12
#
# --- Section 4.7: [Quota settings] ---
#
# [quotaEnableEnforcement]
# Enables enforcement of user and group quota limits by periodically checking
# if the limits are exceeded.
# Note: This uses quota information provided by the underlying local file
# systems of the storage targets.
# Note: Set quota limits with "beegfs-ctl --setquota".
# Note: If this option is true, performance might be slightly decreased due to
# extra information tracking.
# Note: Must be set to the same value in storage servers and mgmtd to be
# effective.
# Default: false
#
# --- Section 5: [Expert options] ---
#
# [storeSelfHealEmptyFiles]
# Delete metadata entries with no content and handle them as though they had
# not existed. Metadata entries with no content can be created by backup
# software that has incorrectly saved or restored metadata.
# Default: true
# [tuneNumCommSlaves]
# Number of threads dedicated to parallel communication with other nodes.
# Default: 2 * tuneNumWorkers
# [tuneCommSlaveBufSize]
# Buffer size used by communication threads, analogous to tuneWorkerBufSize.
# Default: 1m
# [tuneDefaultChunkSize], [tuneDefaultNumStripeTargets]
# Chunk size and number of targets to use when creating the root directory.
# Files and directories inherit these setting from their parents.
# Default: 512k, 4
# [tuneProcessFDLimit]
# Sets the maximum number of files the server can open. If the process rlimit
# is already larger than this number the limit will not be decreased.
# Default: 50000
# [tuneWorkerNumaAffinity]
# Distributes worker threads equally among NUMA nodes on the system when set.
# Default: false
# [tuneListenerNumaAffinity]
# Distributes listener threads equally among NUMA nodes on the system when set.
# Default: false
# [tuneListenerPrioShift]
# Applies a niceness offset to listener threads. Negative values will decrease
# niceness (increse priority), positive values will increase niceness (decrease
# priority).
# Default: -1
# [tuneDirMetadataCacheLimit]
# Number of recently used directory structures to keep in memory.
# Increasing this value may reduce memory allocations and disk I/O.
# Default: 1024
# [tuneLockGrantWaitMS], [tuneLockGrantNumRetries]
# Acknowledgement wait parameters for lock grant messages.
# Locks that are granted asynchronously (ie a client is waiting on the lock)
# notify waiting clients with UDP packets. For each waiter a notification
# packet is sent and the server waits for tuneLockGrantWaitMS to receive an
# acknowledgement from the client. This process is repeated up to
# tuneLockGrantNumRetries times,
# Default: 333, 15
# [tuneRotateMirrorTargets]
# Choose mirror targets for RAID10 patterns by rotating the selected targets.
# Default: false
# [tuneEarlyUnlinkResponse]
# Respond to unlink messages before chunk files have been unlinked.
# Default: true
# [tuneMirrorTimestamps]
# When buddy mirroring, mirror timestamps as exactly as possible. When this is
# set to `false` timestamps of mirrored files may be incorrect after a failover
# has occured. Disabling timestamp mirroring gives a slight performance boost.
# Default: true
# [tuneDisposalGCPeriod]
# If > 0, disposal files will not be removed instantly. Insead a garbage collector
# will run on each meta node. This sets the Wait time in seconds between runs.
# Default: 0
# [quotaEarlyChownResponse]
# Respond to client chown() requests before chunk files have been changed.
# Quota relies on chunk files having the owner and group information stored in
# metadata. Therefore, setting this to true creates a short time window after
# a chown where the application and the servers have a different view on quota.
# Default: true
# [pidFile]
# Creates a PID file for the daemon when set. Set by init scripts.
# Default: <none>