544 lines
20 KiB
Plaintext
544 lines
20 KiB
Plaintext
# This is a config file for BeeGFS storage nodes.
|
|
# http://www.beegfs.com
|
|
|
|
|
|
# --- [Table of Contents] ---
|
|
#
|
|
# 1) Settings
|
|
# 2) Command Line Arguments
|
|
# 3) Basic Settings Documentation
|
|
# 4) Advanced Settings Documentation
|
|
|
|
|
|
#
|
|
# --- Section 1.1: [Basic Settings] ---
|
|
#
|
|
|
|
sysMgmtdHost =
|
|
|
|
storeStorageDirectory =
|
|
storeAllowFirstRunInit = true
|
|
storeFsUUID =
|
|
|
|
|
|
#
|
|
# --- Section 1.2: [Advanced Settings] ---
|
|
#
|
|
|
|
connAuthFile = /etc/beegfs/conn.auth
|
|
connDisableAuthentication = false
|
|
connBacklogTCP = 128
|
|
connInterfacesFile =
|
|
connMaxInternodeNum = 12
|
|
|
|
connMgmtdPort = 8008
|
|
connStoragePort = 8003
|
|
connPortShift = 0
|
|
|
|
connNetFilterFile =
|
|
|
|
connUseRDMA = true
|
|
connRDMATypeOfService = 0
|
|
connTcpOnlyFilterFile =
|
|
|
|
logType = syslog
|
|
logLevel = 3
|
|
logNoDate = false
|
|
logNumLines = 50000
|
|
logNumRotatedFiles = 5
|
|
logStdFile = /var/log/beegfs-storage.log
|
|
|
|
runDaemonized = true
|
|
|
|
sysResyncSafetyThresholdMins = 10
|
|
sysTargetOfflineTimeoutSecs = 180
|
|
|
|
tuneBindToNumaZone =
|
|
tuneFileReadAheadSize = 0m
|
|
tuneFileReadAheadTriggerSize = 4m
|
|
tuneFileReadSize = 128k
|
|
tuneFileWriteSize = 128k
|
|
tuneFileWriteSyncSize = 0m
|
|
|
|
tuneNumResyncGatherSlaves = 6
|
|
tuneNumResyncSlaves = 12
|
|
tuneNumStreamListeners = 1
|
|
tuneNumWorkers = 12
|
|
tuneUseAggressiveStreamPoll = false
|
|
tuneUsePerTargetWorkers = true
|
|
tuneUsePerUserMsgQueues = false
|
|
tuneWorkerBufSize = 4m
|
|
|
|
|
|
#
|
|
# --- Section 2: [Command Line Arguments] ---
|
|
#
|
|
|
|
# Use the command line argument "cfgFile=/etc/anotherconfig.conf" to
|
|
# specify a different config file for beegfs_storage.
|
|
#
|
|
# All other options in this file can also be used as command line
|
|
# arguments, overriding the corresponding config file values.
|
|
|
|
|
|
#
|
|
# --- Section 3: [Basic Settings Documentation] ---
|
|
#
|
|
|
|
# [sysMgmtdHost]
|
|
# Hostname (or IP) of the host running the management service.
|
|
# (See also "connMgmtdPort")
|
|
# Default: <none>
|
|
|
|
# [storeStorageDirectory]
|
|
# The absoute path to a storage target. A storage target is a directory where
|
|
# the file system can store raw user file contents. Multiple targets can be
|
|
# specified as a comma-separated list.
|
|
# Example: /mnt/beegfs_storage1,/mnt/beegfs_storage2
|
|
# Default: <none>
|
|
|
|
# [storeAllowFirstRunInit]
|
|
# Enables or disables daemon startup with an uninitialized storage directory.
|
|
# This can be used to make sure that the daemon does not run when the storage
|
|
# partition is not mounted (e.g. because it needs repair after a power outage).
|
|
# Note: This setting must be enabled during first startup of the daemon, but
|
|
# may be disabled afterwards.
|
|
# Default: true
|
|
|
|
# [storeFsUUID]
|
|
# Requires the underlying file systems of the storage targets to have the same
|
|
# UUID as set here. This prevents the storage node from accidentaly starting targets
|
|
# from a wrong device, e.g. when it is not properly mounted. To find the UUID to
|
|
# put here, you can, for example, use blkid:
|
|
#
|
|
# blkid -s UUID
|
|
#
|
|
# This will output all devices on the host with their file systems UUID (if there
|
|
# is one). Choose the correct ones and list them here. This command needs to be run
|
|
# as root.
|
|
#
|
|
# The UUIDs need to be listed the same way and order as the storage targets paths
|
|
# provided with the storeStorageDirectory setting above as they are checked against
|
|
# the paths in that order.
|
|
#
|
|
# If left empty, the check is skipped. It is highly recommended to enable this check
|
|
# after installation to prevent data corruption.
|
|
# Default: <none>
|
|
#
|
|
|
|
#
|
|
# --- Section 4: [Advanced Settings Documentation] ---
|
|
#
|
|
|
|
#
|
|
# --- Section 4.1: [Connections & Communication] ---
|
|
#
|
|
|
|
# [connAuthFile]
|
|
# The path to a file that contains a shared secret for connection based
|
|
# authentication. Only peers that use the same shared secret will be able to
|
|
# connect.
|
|
# Default: <none>
|
|
|
|
# [connDisableAuthentication]
|
|
# If set to true, explicitly disables connection authentication and allow the
|
|
# service to run without a connAuthFile. Running BeeGFS without connection
|
|
# authentication is considered insecure and is not recommended.
|
|
# Default: false
|
|
|
|
# [connBacklogTCP]
|
|
# The TCP listen backlog.
|
|
# Default: 128
|
|
|
|
# [connInterfacesFile]
|
|
# The path to a text file that specifies the names of the interfaces which
|
|
# may be used for communication. One interface per line. The line number also
|
|
# defines the priority of the interface.
|
|
# Example: "ib0" in the first line, "eth0" in the second line.
|
|
# Values: This setting is optional. If unspecified, all available interfaces
|
|
# will be used and priorities will be assigned automatically.
|
|
# Note: This information is sent to other hosts to inform them about possible
|
|
# communication paths. See connRestrictOutboundInterfaces for this
|
|
# configuration's potential effect on outbound connections.
|
|
# Default: <none>
|
|
|
|
# [connInterfacesList]
|
|
# Comma-separated list of interface names. Performs the same function as
|
|
# connInterfacesFile.
|
|
# Default: <none>
|
|
|
|
# [connRestrictOutboundInterfaces]
|
|
# The default behavior of BeeGFS is to use any available network interface
|
|
# to establish an outbound connection to a node, according to the TCP/IP
|
|
# configuration of the operating system. When connRestrictOutboundInterfaces
|
|
# is set to true, the network interfaces used for outbound connections are
|
|
# limited to the values specified by connInterfacesFile or connInterfacesList.
|
|
# The operating system routing tables are consulted to determine which
|
|
# interface to use for a particular node's IP address. If there is no
|
|
# route from the configured interfaces that is suitable for a node's IP
|
|
# addresses then the connection will fail to be established.
|
|
# Default: false
|
|
|
|
# [connNoDefaultRoute]
|
|
# When connRestrictOutboundInterfaces is true, the routing logic would use
|
|
# the default route for a Node's IP address when no specific route for that
|
|
# address is found in the routing tables. This can be problematic during a
|
|
# failure situation, as the default route is not appropriate to use for a
|
|
# subnet that is accessible from an interface that has failed.
|
|
# connNoDefaultRoute is a comma-separated list of CIDRs that should never
|
|
# be accessed via the default route.
|
|
# Default: 0.0.0.0/0. This prevents the default route from ever being used.
|
|
|
|
# [connMaxInternodeNum]
|
|
# The maximum number of simultaneous connections to the same node.
|
|
# Default: 12
|
|
|
|
# [connMgmtdPort]
|
|
# The UDP and TCP port of the management node.
|
|
# Default: 8008
|
|
|
|
# [connStoragePort]
|
|
# The UDP and TCP port of the storage node.
|
|
# Default: 8003
|
|
|
|
# [connPortShift]
|
|
# Shifts all following UDP and TCP ports according to the specified value.
|
|
# Intended to make port configuration easier in case you do not want to
|
|
# configure each port individually.
|
|
# Default: 0
|
|
|
|
# [connNetFilterFile]
|
|
# The path to a text file that specifies allowed IP subnets, which may be used
|
|
# for outgoing communication. One subnet per line in classless notation (IP
|
|
# address and number of significant bits).
|
|
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
|
|
# line.
|
|
# Values: This setting is optional. If unspecified, all addresses are allowed
|
|
# for outgoing communication.
|
|
# Default: <none>
|
|
|
|
# [connTCPRcvBufSize], [connUDPRcvBufSize]
|
|
# Sets the size for TCP and UDP socket receive buffers (SO_RCVBUF). The maximum
|
|
# allowed value is determined by sysctl net.core.rmem_max. This value is
|
|
# ignored if it is less than the default value determined by
|
|
# net.core.rmem_default.
|
|
# For legacy reasons, the default value 0 indicates that the buffer size is set
|
|
# to connRDMABufNum * connRDMABufSize.
|
|
# -1 indicates that the buffer size should be left at the system default.
|
|
# Default: 0
|
|
|
|
# [connUseRDMA]
|
|
# Enables the use of Remote Direct Memory Access (RDMA) for Infiniband.
|
|
# This setting only has effect if libbeegfs-ib is installed.
|
|
# Default: true
|
|
|
|
# [connRDMABufNum], [connRDMABufSize]
|
|
# Infiniband RDMA buffer settings.
|
|
# connRDMABufSize is the maximum size of a buffer (in bytes) that will be sent
|
|
# over the network; connRDMABufNum is the number of available buffers that can
|
|
# be in flight for a single connection. These client settings are also applied
|
|
# on the server side for each connection.
|
|
# Note: RAM usage per connection is connRDMABufSize x connRDMABufNum x 2. Keep
|
|
# resulting RAM usage (x connMaxInternodeNum x number_of_clients) on the
|
|
# server in mind when increasing these values.
|
|
# Note: The client needs to allocate physically contiguous pages for
|
|
# connRDMABufSize, so this setting shouldn't be higher than a few kbytes.
|
|
# Default: 8192, 70
|
|
|
|
# [connRDMATypeOfService]
|
|
# Infiniband provides the option to set a type of service for an application.
|
|
# This type of service can be used by your subnet manager to provide Quality of
|
|
# Service functionality (e.g. setting different service levels).
|
|
# In openSM the service type will be mapped to the parameter qos-class, which
|
|
# can be handled in your QoS configuration.
|
|
# See
|
|
# www.openfabrics.org/downloads/OFED/ofed-1.4/OFED-1.4-docs/
|
|
# QoS_management_in_OpenSM.txt
|
|
# for more information on how to configure openSM for QoS.
|
|
# This parameter sets the type of service for all outgoing connections of this
|
|
# daemon.
|
|
# Default: 0 (Max: 255)
|
|
|
|
# [connTcpOnlyFilterFile]
|
|
# The path to a text file that specifies IP address ranges to which no RDMA
|
|
# connection should be established. This is useful e.g. for environments where
|
|
# all hosts support RDMA, but some hosts cannot connect via RDMA to some other
|
|
# hosts.
|
|
# Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second
|
|
# line.
|
|
# Values: This setting is optional.
|
|
# Default: <none>
|
|
|
|
# [connMessagingTimeouts]
|
|
# These constants are used to set some of the connection timeouts for sending
|
|
# and receiving data between services in the cluster. They used to be hard-coded
|
|
# (CONN_LONG_TIMEOUT, CONN_MEDIUM_TIMEOUT and CONN_SHORT_TIMEOUT) but are now
|
|
# made configurable for experimentation purposes.
|
|
# This option takes three integer values of milliseconds, separated by a comma
|
|
# in the order long, medium, short.
|
|
# WARNING: This is an EXPERIMENTAL configuration option that should not be
|
|
# changed in production environments unless properly tested and validated.
|
|
# Some configurations can lead to service lockups and other subtle issues.
|
|
# Please make sure that you know exactly what you are doing and properly
|
|
# test any changes you make.
|
|
# Default: 600000,90000,30000
|
|
|
|
# [connRDMATimeouts]
|
|
# These constants are used to set some of the timeouts for sending and receiving
|
|
# data between services in the cluster via RDMA. They used to be
|
|
# hard-coded IBVSOCKET_CONN_TIMEOUT_MS, IBVSOCKET_FLOWCONTROL_ONSEND_TIMEOUT_MS
|
|
# and a 10000 literal for poll timeout but are now made configurable for
|
|
# experimentation purposes.
|
|
# This option takes three integer values of milliseconds, separated by a comma
|
|
# in the order connectMS, flowSendMS and pollMS.
|
|
# WARNING: This is an EXPERIMENTAL configuration option that should not be
|
|
# changed in production environments unless properly tested and validated.
|
|
# Some configurations can lead to service lockups and other subtle issues.
|
|
# Please make sure that you know exactly what you are doing and properly
|
|
# test any changes you make.
|
|
# Default: 3000,180000,7500
|
|
|
|
# [connFallbackExpirationSecs]
|
|
# The time in seconds after which a connection to a fallback interface expires.
|
|
# When a fallback connection expires, the system will try to establish a new
|
|
# connection to the other hosts primary interface (falling back to another
|
|
# interface again if necessary).
|
|
# Note: The priority of node interfaces can be configured using the
|
|
# "connInterfacesFile" parameter.
|
|
# Default: 900
|
|
|
|
|
|
#
|
|
# --- Section 4.2: [Logging] ---
|
|
#
|
|
|
|
# [logType]
|
|
# Defines the logger type. This can either be "syslog" to send log messages to
|
|
# the general system logger or "logfile". If set to logfile logs will be written
|
|
# to logStdFile.
|
|
# Default: logfile
|
|
|
|
# [logLevel]
|
|
# Defines the amount of output messages. The higher this level, the more
|
|
# detailed the log messages will be.
|
|
# Note: Levels above 3 might decrease performance.
|
|
# Default: 3 (Max: 5)
|
|
|
|
# [logNoDate]
|
|
# Defines whether "date & time" (=false) or the current "time only" (=true)
|
|
# should be logged.
|
|
# Default: false
|
|
|
|
# [logNumLines]
|
|
# The maximum number of lines per log file.
|
|
# Default: 50000
|
|
|
|
# [logNumRotatedFiles]
|
|
# The number of old files to keep when "logNumLines" is reached and the log file
|
|
# is rewritten (log rotation).
|
|
# Default: 5
|
|
|
|
# [logStdFile]
|
|
# The path and filename of the log file for standard log messages.
|
|
# The parameter will be considered only if logType value is not equal to syslog.
|
|
# If no name is specified, the messages will be written to the console.
|
|
# Default: /var/log/beegfs_storage.log
|
|
|
|
|
|
#
|
|
# --- Section 4.4: [Startup] ---
|
|
#
|
|
|
|
# [runDaemonized]
|
|
# Detach the process from its parent (and from stdin/-out/-err).
|
|
# Default: true
|
|
|
|
|
|
#
|
|
# --- Section 4.5: [System Settings] ---
|
|
#
|
|
|
|
# [sysResyncSafetyThresholdMins]
|
|
# Automatic mirror resyncs use the last successful communication time between
|
|
# two mirror buddies to skip verification of files that were not recently
|
|
# modified before a server went offline. As BeeGFS uses server-side write
|
|
# caching (where the cache is flushed to disk every minute by the Linux kernel),
|
|
# it is possible that a server looses its write cache in case of a crash, which
|
|
# contained data before the last successful communication. This value adds an
|
|
# extra amount of time to the last successful communication timestamp to include
|
|
# the time window of a potential cache loss.
|
|
# The value may be 0 (which doesn't mean there is no threshold) to completely
|
|
# disable the use of the last successful communication timestamp,
|
|
# i.e. that a full resync will be done.
|
|
# Values: time in minutes
|
|
# Default: 10
|
|
|
|
# [sysTargetOfflineTimeoutSecs]
|
|
# Timeout until targets on a storage server are considered offline by the
|
|
# management node when no target state updates can be fetched from that server.
|
|
# Note: This must be the same value as in the /etc/beegfs/beegfs-mgmtd.conf on
|
|
# the management node.
|
|
# Values: time in seconds
|
|
# Default: 180
|
|
|
|
|
|
#
|
|
# --- Section 4.6: [Tuning] ---
|
|
#
|
|
|
|
# [tuneBindToNumaZone]
|
|
# Defines the zero-based NUMA zone number to which all threads of this process
|
|
# should be bound. If unset, all available CPU cores may be used.
|
|
# Zone binding is especially useful if the corresponding devices (e.g. storage
|
|
# controller and network card) are also attached to the same zone.
|
|
# Note: The Linux kernel shows NUMA zones at /sys/devices/system/node/nodeXY
|
|
# Default: <unset>
|
|
|
|
# [tuneFileReadAheadSize], [tuneFileReadAheadTriggerSize]
|
|
# tuneFileReadAheadSize is the byte range submitted to the kernel for read-head
|
|
# after at least tuneFileReadAheadTriggerSize file bytes were read sequentially
|
|
# from a target.
|
|
# Values: A typical setting is tuneFileReadAheadSize=2m. The optimal setting
|
|
# depends on your storage system configuration (e.g. your RAID layout).
|
|
# Default: tuneFileReadAheadSize=0, tuneFileReadAheadTriggerSize=4m
|
|
|
|
# [tuneFileReadSize], [tuneFileWriteSize]
|
|
# The maximum amount of data that the server should write to (or read from)
|
|
# the underlying local file system in a single operation.
|
|
# Note: Setting these values higher than the file chunk size or
|
|
# tuneWorkerBufSize has no effect.
|
|
# Default: tuneFileReadSize=128k, tuneFileWriteSize=128k
|
|
|
|
# [tuneFileWriteSyncSize]
|
|
# The number of sequentially written bytes (per file) after which the kernel
|
|
# will be advised to commit the written data to the underlying storage device.
|
|
# This is intended to avoid delays until the kernel notices that it is time to
|
|
# commit written data, which would reduce streaming write throughput.
|
|
# Note: When this setting is enabled, it is important to use the deadline
|
|
# scheduler (/sys/block/<...>/scheduler) to avoid reader starvation. It is
|
|
# also important to use a large request queue (/sys/block/<...>/nr_requests),
|
|
# as writes can only be asynchronous while there are free slots in the queue.
|
|
# Values: "0" disables this mechanism. Use "32m" (or a close even multiple of
|
|
# your RAID stripe set size) to test the effects of this.
|
|
# Default: 0
|
|
|
|
# [tuneNumResyncGatherSlaves]
|
|
# The number of threads (per target) used to gather file system information for
|
|
# a buddy mirror resync.
|
|
# Default: 6
|
|
|
|
# [tuneNumResyncSlaves]
|
|
# The number of threads (per target) used to perform the actual file and
|
|
# directory synchronizations for a buddy mirror resync.
|
|
# Default: 12
|
|
|
|
# [tuneNumStreamListeners]
|
|
# The number of threads waiting for incoming data events. Connections with
|
|
# incoming data will be handed over to the worker threads for actual message
|
|
# processing.
|
|
# Default: 1
|
|
|
|
# [tuneNumWorkers]
|
|
# The number of worker threads. Higher number of workers allows the server to
|
|
# handle more client requests in parallel, which also results in more
|
|
# concurrent access to the underlying storage device.
|
|
# Note: See also tuneUsePerTargetWorkers.
|
|
# Default: 12
|
|
|
|
# [tuneUseAggressiveStreamPoll]
|
|
# If set to true, the StreamListener component, which waits for incoming
|
|
# requests, will keep actively polling for events instead of sleeping until
|
|
# an event occurs. Active polling will reduce latency for processing of
|
|
# incoming requests at the cost of higher CPU usage.
|
|
# Default: false
|
|
|
|
# [tuneUsePerTargetWorkers]
|
|
# If set to true, a separate set of worker threads is created and exclusively
|
|
# assigned to each attached storage target. If set to false, a global set of
|
|
# worker threads is used and each worker thread can handle requests for all
|
|
# targets.
|
|
# Separate worker threads are intended to improve balance of I/O workload
|
|
# across targets under high load (i.e. when the number of concurrently incoming
|
|
# requests is higher than the number of worker threads).
|
|
# Note: If set to true, the actual number of created worker threads is
|
|
# tuneNumWorkers x number_of_attached_targets.
|
|
# Default: true
|
|
|
|
# [tuneUsePerUserMsgQueues]
|
|
# If set to true, per-user queues will be used to decide which of the pending
|
|
# requests is handled by the next available worker thread. If set to false, a
|
|
# single queue will be used and incoming requests will be processed in
|
|
# first-come, first-served order.
|
|
# Per-user queues are intended to improve fairness in multi-user environments.
|
|
# Default: false
|
|
|
|
# [tuneWorkerBufSize]
|
|
# The buffer size, which is allocated twice by each worker thread for IO and
|
|
# network data buffering.
|
|
# Note: For optimal performance, this value must be at least 1MB higher than
|
|
# tuneFileReadSize and tuneFileWriteSize.
|
|
# Default: 4m
|
|
|
|
|
|
#
|
|
# --- Section 4.7: [Quota settings] ---
|
|
#
|
|
|
|
# [quotaEnableEnforcement]
|
|
# Enables enforcement of user and group quota limits by periodically checking
|
|
# if the limits are exceeded.
|
|
# Note: This uses quota information provided by the underlying local file
|
|
# systems of the storage targets.
|
|
# Note: Set quota limits with "beegfs-ctl --setquota".
|
|
# Note: If this option is true, performance might be slightly decreased due to
|
|
# extra information tracking.
|
|
# Note: Must be set to the same value in meta servers and mgmtd to be
|
|
# effective.
|
|
# Default: false
|
|
|
|
|
|
#
|
|
# --- Section 5: [Expert options] ---
|
|
#
|
|
|
|
# [tuneProcessFDLimit]
|
|
# Sets the maximum number of files the server can open. If the process rlimit
|
|
# is already larger than this number the limit will not be decreased.
|
|
# Default: 50000
|
|
|
|
# [tuneWorkerNumaAffinity]
|
|
# Distributes worker threads equally among NUMA nodes on the system when set.
|
|
# Default: false
|
|
|
|
# [tuneListenerNumaAffinity]
|
|
# Distributes listener threads equally among NUMA nodes on the system when set.
|
|
# Default: false
|
|
|
|
# [tuneListenerPrioShift]
|
|
# Applies a niceness offset to listener threads. Negative values will decrease
|
|
# niceness (increse priority), positive values will increase niceness (decrease
|
|
# priority).
|
|
# Default: -1
|
|
|
|
# [tuneDirCacheLimit]
|
|
# Number of recently used chunk directory structures to keep in memory.
|
|
# Increasing this value may reduce memory allocations.
|
|
# Default: 1024
|
|
|
|
# [tuneEarlyStat]
|
|
# Compute file size and storage block usage of a chunk before close() is set.
|
|
# Some filesystems may report block usage greater than required to hold all
|
|
# file data if stat() is called before close().
|
|
# Default: false
|
|
|
|
# [quotaDisableZfsSupport]
|
|
# Disable quota support for ZFS if quota is enabled.
|
|
# ZFS quota requires the libzfs library. Errors while loading the library are
|
|
# reported if quota is enabled and the library is not installed (or the wrong
|
|
# version is installed)
|
|
# Default: false
|
|
|
|
# [pidFile]
|
|
# Creates a PID file for the daemon when set. Set by init scripts.
|
|
# Default: <none>
|