# This is a config file for BeeGFS storage nodes. # http://www.beegfs.com # --- [Table of Contents] --- # # 1) Settings # 2) Command Line Arguments # 3) Basic Settings Documentation # 4) Advanced Settings Documentation # # --- Section 1.1: [Basic Settings] --- # sysMgmtdHost = storeStorageDirectory = storeAllowFirstRunInit = true storeFsUUID = # # --- Section 1.2: [Advanced Settings] --- # connAuthFile = /etc/beegfs/conn.auth connDisableAuthentication = false connBacklogTCP = 128 connInterfacesFile = connMaxInternodeNum = 12 connMgmtdPort = 8008 connStoragePort = 8003 connPortShift = 0 connNetFilterFile = connUseRDMA = true connRDMATypeOfService = 0 connTcpOnlyFilterFile = logType = syslog logLevel = 3 logNoDate = false logNumLines = 50000 logNumRotatedFiles = 5 logStdFile = /var/log/beegfs-storage.log runDaemonized = true sysResyncSafetyThresholdMins = 10 sysTargetOfflineTimeoutSecs = 180 tuneBindToNumaZone = tuneFileReadAheadSize = 0m tuneFileReadAheadTriggerSize = 4m tuneFileReadSize = 128k tuneFileWriteSize = 128k tuneFileWriteSyncSize = 0m tuneNumResyncGatherSlaves = 6 tuneNumResyncSlaves = 12 tuneNumStreamListeners = 1 tuneNumWorkers = 12 tuneUseAggressiveStreamPoll = false tuneUsePerTargetWorkers = true tuneUsePerUserMsgQueues = false tuneWorkerBufSize = 4m # # --- Section 2: [Command Line Arguments] --- # # Use the command line argument "cfgFile=/etc/anotherconfig.conf" to # specify a different config file for beegfs_storage. # # All other options in this file can also be used as command line # arguments, overriding the corresponding config file values. # # --- Section 3: [Basic Settings Documentation] --- # # [sysMgmtdHost] # Hostname (or IP) of the host running the management service. # (See also "connMgmtdPort") # Default: # [storeStorageDirectory] # The absoute path to a storage target. A storage target is a directory where # the file system can store raw user file contents. Multiple targets can be # specified as a comma-separated list. # Example: /mnt/beegfs_storage1,/mnt/beegfs_storage2 # Default: # [storeAllowFirstRunInit] # Enables or disables daemon startup with an uninitialized storage directory. # This can be used to make sure that the daemon does not run when the storage # partition is not mounted (e.g. because it needs repair after a power outage). # Note: This setting must be enabled during first startup of the daemon, but # may be disabled afterwards. # Default: true # [storeFsUUID] # Requires the underlying file systems of the storage targets to have the same # UUID as set here. This prevents the storage node from accidentaly starting targets # from a wrong device, e.g. when it is not properly mounted. To find the UUID to # put here, you can, for example, use blkid: # # blkid -s UUID # # This will output all devices on the host with their file systems UUID (if there # is one). Choose the correct ones and list them here. This command needs to be run # as root. # # The UUIDs need to be listed the same way and order as the storage targets paths # provided with the storeStorageDirectory setting above as they are checked against # the paths in that order. # # If left empty, the check is skipped. It is highly recommended to enable this check # after installation to prevent data corruption. # Default: # # # --- Section 4: [Advanced Settings Documentation] --- # # # --- Section 4.1: [Connections & Communication] --- # # [connAuthFile] # The path to a file that contains a shared secret for connection based # authentication. Only peers that use the same shared secret will be able to # connect. # Default: # [connDisableAuthentication] # If set to true, explicitly disables connection authentication and allow the # service to run without a connAuthFile. Running BeeGFS without connection # authentication is considered insecure and is not recommended. # Default: false # [connBacklogTCP] # The TCP listen backlog. # Default: 128 # [connInterfacesFile] # The path to a text file that specifies the names of the interfaces which # may be used for communication. One interface per line. The line number also # defines the priority of the interface. # Example: "ib0" in the first line, "eth0" in the second line. # Values: This setting is optional. If unspecified, all available interfaces # will be used and priorities will be assigned automatically. # Note: This information is sent to other hosts to inform them about possible # communication paths. See connRestrictOutboundInterfaces for this # configuration's potential effect on outbound connections. # Default: # [connInterfacesList] # Comma-separated list of interface names. Performs the same function as # connInterfacesFile. # Default: # [connRestrictOutboundInterfaces] # The default behavior of BeeGFS is to use any available network interface # to establish an outbound connection to a node, according to the TCP/IP # configuration of the operating system. When connRestrictOutboundInterfaces # is set to true, the network interfaces used for outbound connections are # limited to the values specified by connInterfacesFile or connInterfacesList. # The operating system routing tables are consulted to determine which # interface to use for a particular node's IP address. If there is no # route from the configured interfaces that is suitable for a node's IP # addresses then the connection will fail to be established. # Default: false # [connNoDefaultRoute] # When connRestrictOutboundInterfaces is true, the routing logic would use # the default route for a Node's IP address when no specific route for that # address is found in the routing tables. This can be problematic during a # failure situation, as the default route is not appropriate to use for a # subnet that is accessible from an interface that has failed. # connNoDefaultRoute is a comma-separated list of CIDRs that should never # be accessed via the default route. # Default: 0.0.0.0/0. This prevents the default route from ever being used. # [connMaxInternodeNum] # The maximum number of simultaneous connections to the same node. # Default: 12 # [connMgmtdPort] # The UDP and TCP port of the management node. # Default: 8008 # [connStoragePort] # The UDP and TCP port of the storage node. # Default: 8003 # [connPortShift] # Shifts all following UDP and TCP ports according to the specified value. # Intended to make port configuration easier in case you do not want to # configure each port individually. # Default: 0 # [connNetFilterFile] # The path to a text file that specifies allowed IP subnets, which may be used # for outgoing communication. One subnet per line in classless notation (IP # address and number of significant bits). # Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second # line. # Values: This setting is optional. If unspecified, all addresses are allowed # for outgoing communication. # Default: # [connTCPRcvBufSize], [connUDPRcvBufSize] # Sets the size for TCP and UDP socket receive buffers (SO_RCVBUF). The maximum # allowed value is determined by sysctl net.core.rmem_max. This value is # ignored if it is less than the default value determined by # net.core.rmem_default. # For legacy reasons, the default value 0 indicates that the buffer size is set # to connRDMABufNum * connRDMABufSize. # -1 indicates that the buffer size should be left at the system default. # Default: 0 # [connUseRDMA] # Enables the use of Remote Direct Memory Access (RDMA) for Infiniband. # This setting only has effect if libbeegfs-ib is installed. # Default: true # [connRDMABufNum], [connRDMABufSize] # Infiniband RDMA buffer settings. # connRDMABufSize is the maximum size of a buffer (in bytes) that will be sent # over the network; connRDMABufNum is the number of available buffers that can # be in flight for a single connection. These client settings are also applied # on the server side for each connection. # Note: RAM usage per connection is connRDMABufSize x connRDMABufNum x 2. Keep # resulting RAM usage (x connMaxInternodeNum x number_of_clients) on the # server in mind when increasing these values. # Note: The client needs to allocate physically contiguous pages for # connRDMABufSize, so this setting shouldn't be higher than a few kbytes. # Default: 8192, 70 # [connRDMATypeOfService] # Infiniband provides the option to set a type of service for an application. # This type of service can be used by your subnet manager to provide Quality of # Service functionality (e.g. setting different service levels). # In openSM the service type will be mapped to the parameter qos-class, which # can be handled in your QoS configuration. # See # www.openfabrics.org/downloads/OFED/ofed-1.4/OFED-1.4-docs/ # QoS_management_in_OpenSM.txt # for more information on how to configure openSM for QoS. # This parameter sets the type of service for all outgoing connections of this # daemon. # Default: 0 (Max: 255) # [connTcpOnlyFilterFile] # The path to a text file that specifies IP address ranges to which no RDMA # connection should be established. This is useful e.g. for environments where # all hosts support RDMA, but some hosts cannot connect via RDMA to some other # hosts. # Example: "192.168.10.0/24" in the first line, "192.168.20.0/24" in the second # line. # Values: This setting is optional. # Default: # [connMessagingTimeouts] # These constants are used to set some of the connection timeouts for sending # and receiving data between services in the cluster. They used to be hard-coded # (CONN_LONG_TIMEOUT, CONN_MEDIUM_TIMEOUT and CONN_SHORT_TIMEOUT) but are now # made configurable for experimentation purposes. # This option takes three integer values of milliseconds, separated by a comma # in the order long, medium, short. # WARNING: This is an EXPERIMENTAL configuration option that should not be # changed in production environments unless properly tested and validated. # Some configurations can lead to service lockups and other subtle issues. # Please make sure that you know exactly what you are doing and properly # test any changes you make. # Default: 600000,90000,30000 # [connRDMATimeouts] # These constants are used to set some of the timeouts for sending and receiving # data between services in the cluster via RDMA. They used to be # hard-coded IBVSOCKET_CONN_TIMEOUT_MS, IBVSOCKET_FLOWCONTROL_ONSEND_TIMEOUT_MS # and a 10000 literal for poll timeout but are now made configurable for # experimentation purposes. # This option takes three integer values of milliseconds, separated by a comma # in the order connectMS, flowSendMS and pollMS. # WARNING: This is an EXPERIMENTAL configuration option that should not be # changed in production environments unless properly tested and validated. # Some configurations can lead to service lockups and other subtle issues. # Please make sure that you know exactly what you are doing and properly # test any changes you make. # Default: 3000,180000,7500 # [connFallbackExpirationSecs] # The time in seconds after which a connection to a fallback interface expires. # When a fallback connection expires, the system will try to establish a new # connection to the other hosts primary interface (falling back to another # interface again if necessary). # Note: The priority of node interfaces can be configured using the # "connInterfacesFile" parameter. # Default: 900 # # --- Section 4.2: [Logging] --- # # [logType] # Defines the logger type. This can either be "syslog" to send log messages to # the general system logger or "logfile". If set to logfile logs will be written # to logStdFile. # Default: logfile # [logLevel] # Defines the amount of output messages. The higher this level, the more # detailed the log messages will be. # Note: Levels above 3 might decrease performance. # Default: 3 (Max: 5) # [logNoDate] # Defines whether "date & time" (=false) or the current "time only" (=true) # should be logged. # Default: false # [logNumLines] # The maximum number of lines per log file. # Default: 50000 # [logNumRotatedFiles] # The number of old files to keep when "logNumLines" is reached and the log file # is rewritten (log rotation). # Default: 5 # [logStdFile] # The path and filename of the log file for standard log messages. # The parameter will be considered only if logType value is not equal to syslog. # If no name is specified, the messages will be written to the console. # Default: /var/log/beegfs_storage.log # # --- Section 4.4: [Startup] --- # # [runDaemonized] # Detach the process from its parent (and from stdin/-out/-err). # Default: true # # --- Section 4.5: [System Settings] --- # # [sysResyncSafetyThresholdMins] # Automatic mirror resyncs use the last successful communication time between # two mirror buddies to skip verification of files that were not recently # modified before a server went offline. As BeeGFS uses server-side write # caching (where the cache is flushed to disk every minute by the Linux kernel), # it is possible that a server looses its write cache in case of a crash, which # contained data before the last successful communication. This value adds an # extra amount of time to the last successful communication timestamp to include # the time window of a potential cache loss. # The value may be 0 (which doesn't mean there is no threshold) to completely # disable the use of the last successful communication timestamp, # i.e. that a full resync will be done. # Values: time in minutes # Default: 10 # [sysTargetOfflineTimeoutSecs] # Timeout until targets on a storage server are considered offline by the # management node when no target state updates can be fetched from that server. # Note: This must be the same value as in the /etc/beegfs/beegfs-mgmtd.conf on # the management node. # Values: time in seconds # Default: 180 # # --- Section 4.6: [Tuning] --- # # [tuneBindToNumaZone] # Defines the zero-based NUMA zone number to which all threads of this process # should be bound. If unset, all available CPU cores may be used. # Zone binding is especially useful if the corresponding devices (e.g. storage # controller and network card) are also attached to the same zone. # Note: The Linux kernel shows NUMA zones at /sys/devices/system/node/nodeXY # Default: # [tuneFileReadAheadSize], [tuneFileReadAheadTriggerSize] # tuneFileReadAheadSize is the byte range submitted to the kernel for read-head # after at least tuneFileReadAheadTriggerSize file bytes were read sequentially # from a target. # Values: A typical setting is tuneFileReadAheadSize=2m. The optimal setting # depends on your storage system configuration (e.g. your RAID layout). # Default: tuneFileReadAheadSize=0, tuneFileReadAheadTriggerSize=4m # [tuneFileReadSize], [tuneFileWriteSize] # The maximum amount of data that the server should write to (or read from) # the underlying local file system in a single operation. # Note: Setting these values higher than the file chunk size or # tuneWorkerBufSize has no effect. # Default: tuneFileReadSize=128k, tuneFileWriteSize=128k # [tuneFileWriteSyncSize] # The number of sequentially written bytes (per file) after which the kernel # will be advised to commit the written data to the underlying storage device. # This is intended to avoid delays until the kernel notices that it is time to # commit written data, which would reduce streaming write throughput. # Note: When this setting is enabled, it is important to use the deadline # scheduler (/sys/block/<...>/scheduler) to avoid reader starvation. It is # also important to use a large request queue (/sys/block/<...>/nr_requests), # as writes can only be asynchronous while there are free slots in the queue. # Values: "0" disables this mechanism. Use "32m" (or a close even multiple of # your RAID stripe set size) to test the effects of this. # Default: 0 # [tuneNumResyncGatherSlaves] # The number of threads (per target) used to gather file system information for # a buddy mirror resync. # Default: 6 # [tuneNumResyncSlaves] # The number of threads (per target) used to perform the actual file and # directory synchronizations for a buddy mirror resync. # Default: 12 # [tuneNumStreamListeners] # The number of threads waiting for incoming data events. Connections with # incoming data will be handed over to the worker threads for actual message # processing. # Default: 1 # [tuneNumWorkers] # The number of worker threads. Higher number of workers allows the server to # handle more client requests in parallel, which also results in more # concurrent access to the underlying storage device. # Note: See also tuneUsePerTargetWorkers. # Default: 12 # [tuneUseAggressiveStreamPoll] # If set to true, the StreamListener component, which waits for incoming # requests, will keep actively polling for events instead of sleeping until # an event occurs. Active polling will reduce latency for processing of # incoming requests at the cost of higher CPU usage. # Default: false # [tuneUsePerTargetWorkers] # If set to true, a separate set of worker threads is created and exclusively # assigned to each attached storage target. If set to false, a global set of # worker threads is used and each worker thread can handle requests for all # targets. # Separate worker threads are intended to improve balance of I/O workload # across targets under high load (i.e. when the number of concurrently incoming # requests is higher than the number of worker threads). # Note: If set to true, the actual number of created worker threads is # tuneNumWorkers x number_of_attached_targets. # Default: true # [tuneUsePerUserMsgQueues] # If set to true, per-user queues will be used to decide which of the pending # requests is handled by the next available worker thread. If set to false, a # single queue will be used and incoming requests will be processed in # first-come, first-served order. # Per-user queues are intended to improve fairness in multi-user environments. # Default: false # [tuneWorkerBufSize] # The buffer size, which is allocated twice by each worker thread for IO and # network data buffering. # Note: For optimal performance, this value must be at least 1MB higher than # tuneFileReadSize and tuneFileWriteSize. # Default: 4m # # --- Section 4.7: [Quota settings] --- # # [quotaEnableEnforcement] # Enables enforcement of user and group quota limits by periodically checking # if the limits are exceeded. # Note: This uses quota information provided by the underlying local file # systems of the storage targets. # Note: Set quota limits with "beegfs-ctl --setquota". # Note: If this option is true, performance might be slightly decreased due to # extra information tracking. # Note: Must be set to the same value in meta servers and mgmtd to be # effective. # Default: false # # --- Section 5: [Expert options] --- # # [tuneProcessFDLimit] # Sets the maximum number of files the server can open. If the process rlimit # is already larger than this number the limit will not be decreased. # Default: 50000 # [tuneWorkerNumaAffinity] # Distributes worker threads equally among NUMA nodes on the system when set. # Default: false # [tuneListenerNumaAffinity] # Distributes listener threads equally among NUMA nodes on the system when set. # Default: false # [tuneListenerPrioShift] # Applies a niceness offset to listener threads. Negative values will decrease # niceness (increse priority), positive values will increase niceness (decrease # priority). # Default: -1 # [tuneDirCacheLimit] # Number of recently used chunk directory structures to keep in memory. # Increasing this value may reduce memory allocations. # Default: 1024 # [tuneEarlyStat] # Compute file size and storage block usage of a chunk before close() is set. # Some filesystems may report block usage greater than required to hold all # file data if stat() is called before close(). # Default: false # [quotaDisableZfsSupport] # Disable quota support for ZFS if quota is enabled. # ZFS quota requires the libzfs library. Errors while loading the library are # reported if quota is enabled and the library is not installed (or the wrong # version is installed) # Default: false # [pidFile] # Creates a PID file for the daemon when set. Set by init scripts. # Default: