1607 lines
53 KiB
Bash
Executable File
1607 lines
53 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
CURRENTTIME=$(date +%Y%m%d-%H%M%S)
|
|
|
|
MGMTD_BIN=beegfs-mgmtd
|
|
META_BIN=beegfs-meta
|
|
STORAGE_BIN=beegfs-storage
|
|
CLIENT_BIN=beegfs-client # not really a binary, but name of config, init, etc.
|
|
CTL_BIN=beegfs
|
|
|
|
DEFAULT_LOG_PATH=/var/log
|
|
LOG_PATH=${DEFAULT_LOG_PATH}
|
|
STORAGE_LOG=${STORAGE_BIN}_${CURRENTTIME}.log
|
|
META_LOG=${META_BIN}_${CURRENTTIME}.log
|
|
CLIENT_LOG=${CLIENT_BIN}_${CURRENTTIME}.log
|
|
|
|
STORAGE_CFG_NAME=${STORAGE_BIN}.conf
|
|
META_CFG_NAME=${META_BIN}.conf
|
|
MGMTD_CFG_NAME=${MGMTD_BIN}.toml
|
|
CLIENT_CFG_NAME=${CLIENT_BIN}.conf
|
|
|
|
META_NUMID_FILE=nodeNumID
|
|
TARGET_NUMID_FILE=targetNumID
|
|
|
|
PREFERRED_MDS_FILE=/tmp/preferredMds.fod
|
|
PREFERRED_TARGET_FILE=/tmp/preferredTarget.fod
|
|
|
|
DEFAULT_STATUSFILE=/var/tmp/beeond.tmp
|
|
STATUSFILE=${DEFAULT_STATUSFILE}
|
|
|
|
NUM_META_SERVER=1
|
|
NUM_STORAGE_SERVER=0
|
|
|
|
BEEGFS_BIN_PATH=/opt/beegfs/sbin
|
|
|
|
DEFAULT_MGMTD_GRPC_PORT=8010
|
|
DEFAULT_PORT_SHIFT=1000
|
|
|
|
SSH="ssh"
|
|
SSH_PARAMS=( -qq -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no -n )
|
|
DEFAULT_PDSH_PATH=$(which pdsh 2>/dev/null)
|
|
PDSH_RCMD="ssh"
|
|
|
|
# source helper script
|
|
ABSOLUTE_PATH=$(dirname "$(readlink -e "$0")") # using readlink, because somone might be calling
|
|
# this script using a symlink
|
|
if [ -e "${ABSOLUTE_PATH}/../lib/beegfs-ondemand-stoplocal" ]
|
|
then
|
|
BEEOND_STOPLOCAL="${ABSOLUTE_PATH}/../lib/beegfs-ondemand-stoplocal"
|
|
else
|
|
BEEOND_STOPLOCAL="${ABSOLUTE_PATH}/../scripts/lib/beegfs-ondemand-stoplocal"
|
|
fi
|
|
|
|
#shellcheck source=scripts/lib/beegfs-ondemand-stoplocal
|
|
source "${BEEOND_STOPLOCAL}"
|
|
|
|
# print usage
|
|
print_usage_and_exit()
|
|
{
|
|
echo ""
|
|
echo "BeeOND - BeeGFS OnDemand (http://www.beegfs.com)"
|
|
echo ""
|
|
echo "DESCRIPTION:"
|
|
echo " Script to set up or shut down a BeeGFS setup on the fly."
|
|
echo ""
|
|
echo " Creates a new BeeGFS file system on a set of hosts. All necessary services"
|
|
echo " are automatically started and the file system is mounted. In the same way,"
|
|
echo " the file system can be unmounted again and the services will be shut down."
|
|
echo " Optionally, the contents of the file system can be deleted."
|
|
echo ""
|
|
echo " This script can be used e.g. to automatically create a temporary scratch file"
|
|
echo " system for cluster nodes during a compute job, and to remove it after the job"
|
|
echo " is finished."
|
|
echo ""
|
|
echo "USAGE: $(basename "$0") <action> <options>"
|
|
echo ""
|
|
echo "ACTIONS:"
|
|
echo " The first argument to $(basename "$0") is considered to be an action that the"
|
|
echo " script should perform."
|
|
echo ""
|
|
echo " The following actions are available:"
|
|
echo ""
|
|
echo " start:"
|
|
echo " Start the file system on a number of nodes, specified by the node file."
|
|
echo " The necessary services will be started and the newly created file system"
|
|
echo " will be mounted at the specified mount point. Information about the"
|
|
echo " running file system are stored in a status file on each node."
|
|
echo ""
|
|
echo " Mandatory arguments:"
|
|
echo " -n FILENAME => Node file with line-separated hostnames."
|
|
echo " -d PATH => Path for BeeGFS data on servers."
|
|
echo " -c PATH => Mount point for BeeGFS clients."
|
|
echo ""
|
|
echo " Optional arguments:"
|
|
echo " -i FILENAME => Status information file name."
|
|
echo " Default: ${DEFAULT_STATUSFILE}"
|
|
echo " -F => Remove contents of data path before starting services."
|
|
echo " This is useful if the processes and status file of a"
|
|
echo " previous beeond session are gone, but the"
|
|
echo " data is still there."
|
|
echo " -m NUM => Number of metadata servers to start. Default: 1"
|
|
echo " -s NUM => Number of storage servers to start."
|
|
echo " Default: Number of hosts."
|
|
echo " -p NUM => Network port shift. The standard BeeGFS network port"
|
|
echo " numbers are shifted by this number. Useful in order to"
|
|
echo " have several BeeGFS instances running on the same node."
|
|
echo " Default: ${DEFAULT_PORT_SHIFT}"
|
|
echo " -f PATH => Directory containing additional beegfs config files."
|
|
echo " There can be one file for each service as well as the client."
|
|
echo " They must be named in the form beegfs-<service>.conf, where "
|
|
echo " <service> can be meta, storage, mgmtd or client."
|
|
echo " Only the options specified within the files are"
|
|
echo " set/overwritten, the rest of the defaults will not be"
|
|
echo " touched and still be applied. The directory and the "
|
|
echo " files need to be present on every node."
|
|
echo " -L PATH => Log file directory. If necessary, the directory will be"
|
|
echo " created. Default: ${DEFAULT_LOG_PATH}"
|
|
echo " -l => Prefer local storage nodes."
|
|
echo " -P => Use pdsh for parallel startup. If this option is not"
|
|
echo " given, ssh is used to start up the services on the nodes"
|
|
echo " sequentially."
|
|
echo " -b PATH => Path to the pdsh binary. Default: <auto-detected>"
|
|
echo " -r => Use tmpfs for beegfs storage and metadata."
|
|
echo " Note: On older Linux versions, tmpfs does not support"
|
|
echo " extended attributes. If you get an error message"
|
|
echo " from beegfs_meta reading \"Failed to store root"
|
|
echo " directory\" you have to provide an additional config"
|
|
echo " file beegfs-meta.conf containing the line"
|
|
echo " storeUseExtendedAttribs = false"
|
|
echo " -k => enable storage target mirroring"
|
|
echo " Note: Needs an even number of storage servers (-s)."
|
|
echo " -j => enable metadata server mirroring"
|
|
echo " Note: Needs an even number of metadata servers (-m)."
|
|
echo " -q => Suppress INFO messages, only print ERRORs."
|
|
echo " -t FILE => Use FILE to define multiple storage targets and assign"
|
|
echo " them to storage pools. The file needs to be in the"
|
|
echo " following format:"
|
|
echo ""
|
|
echo " pool_1:/path/to/target_1,/path/to/target_2,..."
|
|
echo " pool_2:/path/to/target_3,/path/to/target_4,..."
|
|
echo " ..."
|
|
echo ""
|
|
echo " pool_n is the name of the storage pool, the comma separated"
|
|
echo " list after the colon are the paths to the target directories"
|
|
echo " that shall be part of this pool."
|
|
echo " The lines can't contain whitespaces. BeeOND will look for"
|
|
echo " these directories and add them as a storage target on all"
|
|
echo " nodes where they exist. To avoid having unwanted targets"
|
|
echo " in a pool, make sure each of the specified paths only"
|
|
echo " exists on nodes where they are actually mounted on the"
|
|
echo " desired storage medium."
|
|
echo " BeeOND will then assign the targets to the corresponding"
|
|
echo " storage pools and create a directory for each pool"
|
|
echo " on the root level of the BeeGFS mount."
|
|
echo " This option can only be used together with -F."
|
|
echo " -T => Don't create and assign the pool directories when using -t."
|
|
echo " -G => The base gRPC port (before port shifting) that the mgmtd"
|
|
echo " uses in this BeeOND instance. Defaults to 8010 and only needs"
|
|
echo " to be supplied if mgmtd is configured via configuration file"
|
|
echo " (see -f) to use a base gRPC port other than 8010."
|
|
echo ""
|
|
echo " Arguments that require a configuration directory (option -f) that is available"
|
|
echo " on all nodes and contains the required files (see option descriptions):"
|
|
echo " -C => Enable connection authentication. Requires a \"conn.auth\""
|
|
echo " file in the configuration directory."
|
|
echo " -E => Enable TLS encryption between the mgmtmd and the command"
|
|
echo " line configuration tool. Requires \"cert.pem\" and"
|
|
echo " \"key.pem\" files in the configuration directory."
|
|
echo " -H => Enable enterprise features. This mode is required by all"
|
|
echo " other modes that enable enterprise features and requires a"
|
|
echo " \"license.pem\" file in the configuration directory."
|
|
echo ""
|
|
echo " stop:"
|
|
echo " Stop the file system on a number of nodes, specified by the node file."
|
|
echo " Use the information from the status file to unmount a file system on a"
|
|
echo " number of nodes specified by the node file, and shut down the services."
|
|
echo ""
|
|
echo " Mandatory arguments:"
|
|
echo " -n FILENAME => Node file."
|
|
echo ""
|
|
echo " Optional arguments:"
|
|
echo " -i FILENAME => Status information file name."
|
|
echo " Default: ${DEFAULT_STATUSFILE}"
|
|
echo " -d => Delete BeeGFS data on disks."
|
|
echo " -L => Delete log files after successful shutdown."
|
|
echo " -c => \"Cleanup\": Remove remaining processes and directories"
|
|
echo " of a potentially unsuccessful shutdown of an earlier"
|
|
echo " beeond instance. This switch silences the error"
|
|
echo " message when a status information file is not found on a"
|
|
echo " node or an unmount command fails; instead, a message is"
|
|
echo " printed (if \"INFO\" messages are not suppressed) when a"
|
|
echo " status file DOES exist, because this means there"
|
|
echo " actually was an instance before that is now being"
|
|
echo " cleaned up."
|
|
echo " -P => Use pdsh for parallel shutdown. If this option is not"
|
|
echo " given, ssh is used to unmount the file system and stop"
|
|
echo " the services on all nodes sequentially."
|
|
echo " -b PATH => Path to the pdsh binary. Default: ${DEFAULT_PDSH_PATH}"
|
|
echo " -q => Suppress INFO messages, only print ERRORs."
|
|
echo ""
|
|
echo " stoplocal:"
|
|
echo " Stop the file system on the local host only. This is recommended only as"
|
|
echo " an emergency measure, e.g. after a host encountered an error during the"
|
|
echo " distributed shutdown procedure. Uses the information from the status file"
|
|
echo " to unmount the file system and stop the services on the local host only."
|
|
echo ""
|
|
echo " Optional arguments:"
|
|
echo " -i FILENAME => Status information file."
|
|
echo " Default: ${DEFAULT_STATUSFILE}"
|
|
echo " -d => Delete BeeGFS data on disks."
|
|
echo " -L => Delete log files after successful shutdown. If the log"
|
|
echo " directory is empty afterwards, it will also be removed."
|
|
echo " -c => \"Cleanup\": Remove remaining processes and directories"
|
|
echo " of a potentially unsuccessful shutdown of an earlier"
|
|
echo " beeond instance. This switch silences the error"
|
|
echo " message when the status information file is not found or"
|
|
echo " the unmount command fails; instead, a message is printed"
|
|
echo " (if \"INFO\" messages are not suppressed) when a status"
|
|
echo " file DOES exist, because this means there actually was"
|
|
echo " an instance before that is now being cleaned up."
|
|
echo " -q => Suppress INFO messages, only print ERRORs."
|
|
echo " -u => ONLY unmount the file system."
|
|
echo " (Cannot be used in combination with \"-s\".)"
|
|
echo " -s => ONLY stop non-client services. (*)"
|
|
echo " (Cannot be used in combination with \"-u\".)"
|
|
echo ""
|
|
echo "EXAMPLES:"
|
|
echo " Start a beeond instance on the nodes given in nodefile, using the data"
|
|
echo " directory /data/beeond and the client mountpoint /mnt/beeond via pdsh"
|
|
echo " for parallel startup:"
|
|
echo " $(basename "$0") start -n nodefile -d /data/beeond -c /mnt/beeond -P"
|
|
echo ""
|
|
echo " Stop the file system:"
|
|
echo " $(basename "$0") stop -n nodefile -P -L -d"
|
|
echo ""
|
|
exit 1
|
|
}
|
|
|
|
### internal functions for general usage ###
|
|
print_error()
|
|
{
|
|
echo "ERROR: ${1}" >&2
|
|
echo ""
|
|
}
|
|
|
|
print_error_and_exit()
|
|
{
|
|
print_error "${1}"
|
|
exit 1
|
|
}
|
|
|
|
print_info()
|
|
{
|
|
local MESSAGE=${1}
|
|
if [ "${QUIET}" != "true" ]
|
|
then
|
|
echo "INFO: ${MESSAGE}"
|
|
fi
|
|
}
|
|
|
|
check_pdsh()
|
|
{
|
|
#an array is passed here, so this makes parameter passing a bit more complex
|
|
local HOSTS=$1
|
|
|
|
print_info "Checking PDSH availability on the following hosts: ${HOSTS}"
|
|
|
|
# execute cmd
|
|
test -e "${PDSH}" &&\
|
|
${PDSH} -R ${PDSH_RCMD} -S -w "${HOSTS}" \
|
|
"test \${SHELL} = '/bin/bash' || exit 2"
|
|
RES=$?
|
|
|
|
if [ $RES -eq 2 ]
|
|
then
|
|
print_error_and_exit "One or more hosts don't use /bin/bash as default shell."
|
|
elif [ $RES -ne 0 ]
|
|
then
|
|
print_info "pdsh does not seem to work on all nodes. Disabling pdsh and using ssh instead"
|
|
USE_PDSH=false
|
|
|
|
# We have to repeat the reachability check using conventional SSH before continuing.
|
|
IFS=,
|
|
for HOST in ${HOSTS}
|
|
do
|
|
check_reachability "${HOST}"
|
|
done
|
|
unset IFS
|
|
return
|
|
fi
|
|
|
|
${PDSH} -R ${PDSH_RCMD} -S -w "${HOSTS}" \
|
|
"if [ -e ${BEEOND_STOPLOCAL} ]; then true; else exit 2; fi" || \
|
|
print_error_and_exit "Unable to find BeeOND helper program on one or more nodes.
|
|
Please make sure BeeOND is installed on all machines."
|
|
}
|
|
|
|
execute_ssh_cmd()
|
|
{
|
|
local HOST="$1"
|
|
local CMD="$2"
|
|
|
|
# error checks
|
|
if [ "${HOST}" = "" ] || [ "${CMD}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'execute_ssh_cmd' was called without a host or \
|
|
without a command"
|
|
fi
|
|
|
|
# execute cmd
|
|
${SSH} "${SSH_PARAMS[@]}" "${HOST}" "${CMD}"
|
|
}
|
|
|
|
execute_pdsh_cmd()
|
|
{
|
|
local HOSTS="$1" # comma-separated list
|
|
local CMD="$2"
|
|
local CONTINUE_ON_ERROR="$3"
|
|
local TMPTIME
|
|
TMPTIME=$(date +%Y%m%d-%H%M%S)
|
|
local TMPFAILFILE="/tmp/beegfs.pdsh_fail.${TMPTIME}"
|
|
|
|
# error checks
|
|
if [ "${HOSTS}" = "" ] || [ "${CMD}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'execute_pdsh_cmd' was called without a host or \
|
|
without a command"
|
|
fi
|
|
|
|
# execute cmd
|
|
if ! ${PDSH} -R ${PDSH_RCMD} -S -w "${HOSTS}" "${CMD} || (touch ${TMPFAILFILE} && false)"
|
|
then
|
|
# pdsh returned non-zero, so there must have been an error on at least one node
|
|
# (-S returns the greatest return value of all nodes).
|
|
# the executed line created a file on the failing node
|
|
# now we have to look on each node for this file if we are interested which node failed
|
|
# for now, we do not do that; only abort and leave it to the user to investigate pdsh output
|
|
if [ "${CONTINUE_ON_ERROR}" = "true" ]
|
|
then
|
|
print_error "Execution of a command failed. Please see pdsh output for more information."
|
|
ERROR="true"
|
|
else
|
|
print_error_and_exit "Execution of a command failed. Please see pdsh output for more \
|
|
information."
|
|
fi
|
|
fi
|
|
}
|
|
|
|
check_reachability()
|
|
{
|
|
local HOST="$1"
|
|
|
|
# error checks
|
|
if [ "${HOST}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'check_reachability' was called without a hostname"
|
|
fi
|
|
|
|
print_info "Checking reachability of host ${HOST}"
|
|
|
|
execute_ssh_cmd "${HOST}" "test \${SHELL} = '/bin/bash'"
|
|
RES=$?
|
|
if [ $RES -eq 255 ]
|
|
then
|
|
print_error_and_exit "Host is unreachable via ssh: ${HOST}"
|
|
elif [ $RES -eq 1 ]
|
|
then
|
|
print_error_and_exit "Host doesn't use /bin/bash as default shell: ${HOST}"
|
|
elif [ $RES -ne 0 ]
|
|
then
|
|
print_error_and_exit "Error contacting host: ${HOST}"
|
|
fi
|
|
|
|
execute_ssh_cmd "${HOST}" "test -e ${BEEOND_STOPLOCAL}" || \
|
|
print_error_and_exit "Could not find BeeOND helper program on host: ${HOST}
|
|
Please make sure BeeOND is installed on all machines."
|
|
}
|
|
|
|
check_hostfile()
|
|
{
|
|
# hostfile set?
|
|
if [ "${HOSTFILE}" = "" ]
|
|
then
|
|
print_error_and_exit "Node file undefined"
|
|
fi
|
|
|
|
# does it exist
|
|
if [ ! -f "${HOSTFILE}" ]
|
|
then
|
|
print_error_and_exit "Node file does not exist: ${HOSTFILE}"
|
|
fi
|
|
}
|
|
|
|
check_datapath()
|
|
{
|
|
if [ "${DATA_PATH}" = "" ]
|
|
then
|
|
print_error_and_exit "Path for BeeGFS data undefined"
|
|
fi
|
|
}
|
|
|
|
check_mountpoint()
|
|
{
|
|
if [ "${MOUNTPOINT}" = "" ]
|
|
then
|
|
print_error_and_exit "Path for client mountpoint undefined"
|
|
fi
|
|
}
|
|
|
|
check_statusfile()
|
|
{
|
|
# checks for every node:
|
|
# - whether the statusfile already exists (maybe a session is already running)
|
|
# - whether the statusfile can be created (if not, we can't continue)
|
|
|
|
local HOSTS=$1
|
|
|
|
if [ "${HOSTS}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'check_statusfile' was called without a hostname"
|
|
fi
|
|
|
|
local CHECK_CMD="[ ! -e \"${STATUSFILE}\" ]"
|
|
local TOUCH_CMD="touch \"${STATUSFILE}\""
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
# see if statusfile already exists
|
|
if ! ${PDSH} -R ${PDSH_RCMD} -S -w "${HOSTS}" "${CHECK_CMD} || (echo \"Statusfile already exists.\" && false)"
|
|
then
|
|
print_error_and_exit "Statusfile ${STATUSFILE} on one ore more hosts already exists. \
|
|
Maybe a session is already running or the previous session was not properly \
|
|
shut down."
|
|
fi
|
|
|
|
# touch statusfile on every host, to make sure the file can be accessed
|
|
if ! ${PDSH} -R ${PDSH_RCMD} -S -w "${HOSTS}" "${TOUCH_CMD}"
|
|
then
|
|
print_error_and_exit "Could not create status file ${STATUSFILE} on one ore more hosts."
|
|
fi
|
|
else
|
|
IFS=,
|
|
for HOST in ${HOSTS}
|
|
do
|
|
# see if statusfile already exists
|
|
if ! ${SSH} "${SSH_PARAMS[@]}" "${HOST}" "${CHECK_CMD}"
|
|
then
|
|
print_error_and_exit "Status file ${STATUSFILE} on host ${HOST} already exists. \
|
|
Maybe a session is already running or the previous session was not properly \
|
|
shut down."
|
|
fi
|
|
done
|
|
|
|
for HOST in ${HOSTS}
|
|
do
|
|
if ! ${SSH} "${SSH_PARAMS[@]}" "${HOST}" "${TOUCH_CMD}"
|
|
then
|
|
print_error_and_exit "Could not create status file ${STATUSFILE} on host ${HOST}"
|
|
fi
|
|
done
|
|
unset IFS
|
|
fi
|
|
}
|
|
|
|
create_log_path()
|
|
{
|
|
local HOSTS
|
|
HOSTS=$(IFS=,; echo "$*") # turn argument list into comma-separated string for PDSH
|
|
|
|
if [ "${HOSTS}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'create_log_path' was called without a host."
|
|
fi
|
|
|
|
# if the path doesn't exist, it's created. If it already exists, nothing happens
|
|
CMD="mkdir -p \"${LOG_PATH}\""
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
execute_pdsh_cmd "${HOSTS}" "${CMD}" "false"
|
|
else
|
|
# no pdsh: do it manually with a loop
|
|
IFS=,
|
|
for HOST in ${HOSTS}
|
|
do
|
|
if ! execute_ssh_cmd "${HOST}" "${CMD}"
|
|
then
|
|
print_error_and_exit "Could not create log path ${LOG_PATH} on host ${HOST}"
|
|
fi
|
|
done
|
|
unset IFS
|
|
fi
|
|
}
|
|
|
|
### internal functions for beegfs-ondemand start ###
|
|
|
|
start_tmpfs()
|
|
{
|
|
local HOSTS=$1
|
|
local DATAPATH=$2
|
|
|
|
# error checks
|
|
if [ "${HOSTS}" = "" ] || [ "${DATAPATH}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'start_tmpfs' called without all needes parameters"
|
|
fi
|
|
|
|
CMD="mkdir -p ${DATAPATH} && mount -t tmpfs tmpfs ${DATAPATH}"
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
print_info "Starting tempfs on the following hosts: ${HOSTS}"
|
|
|
|
execute_pdsh_cmd "${HOSTS}" "${CMD}" "false"
|
|
|
|
IFS=','
|
|
for HOST in ${HOSTS}
|
|
do
|
|
if [ "${HOST}" = "" ]; then continue; fi
|
|
add_to_status_file "${HOST}" tmpfs "${DATAPATH}" - -
|
|
done
|
|
unset IFS
|
|
else
|
|
# no pdsh => do it manually with ssh loop
|
|
print_info "Starting tmpfs mounts"
|
|
|
|
# for each host, start server
|
|
IFS=,
|
|
for HOST in ${HOSTS}
|
|
do
|
|
print_info "Starting tmpfs on host: ${HOST}"
|
|
|
|
if ! execute_ssh_cmd "${HOST}" "${CMD}"
|
|
then
|
|
print_error_and_exit "Unable to start tmpfs on host: ${HOST}"
|
|
else
|
|
add_to_status_file "${HOST}" tmpfs "${DATAPATH}" "-" "-"
|
|
fi
|
|
done
|
|
unset IFS
|
|
fi
|
|
}
|
|
|
|
start_meta_servers()
|
|
{
|
|
local HOSTS=$1 # comma seperated
|
|
local DATAPATH=$2
|
|
local MGMTD=$3
|
|
local PORT_SHIFT=$4 # port shift can be empty!
|
|
local CFG_PATH=$5 # may be empty
|
|
local CFG_FILE=${CFG_PATH}/${META_CFG_NAME}
|
|
|
|
local LOGFILE=${LOG_PATH}/${META_LOG}
|
|
local PIDFILE=/var/run/${META_BIN}-${CURRENTTIME}.pid
|
|
|
|
# error checks
|
|
if [ "${HOSTS}" = "" ] || [ "${MGMTD}" = "" ] || [ "${DATAPATH}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'start_meta_servers_ssh' was called without all \
|
|
needed parameters"
|
|
fi
|
|
|
|
DATAPATH=${DATAPATH}/${META_BIN}
|
|
|
|
PARAMS="sysMgmtdHost=${MGMTD} storeMetaDirectory=${DATAPATH} logStdFile=${LOGFILE} \
|
|
${CONNAUTH_LEGACY} runDaemonized=true pidFile=${PIDFILE}"
|
|
|
|
if [ "${PORT_SHIFT}" != "" ]
|
|
then
|
|
PARAMS="${PARAMS} connPortShift=${PORT_SHIFT}"
|
|
fi
|
|
|
|
CMD="PARAMS=\"${PARAMS}\"; \
|
|
if [ -n \"${CFG_PATH}\" ] && [ -e \"${CFG_FILE}\" ]; then \
|
|
PARAMS=\"\${PARAMS} cfgFile=${CFG_FILE}\"; fi; \
|
|
if [ \"${CLEAR_DATA}\" = \"true\" ]; then \
|
|
rm -rf ${DATAPATH}; fi; \
|
|
${BEEGFS_BIN_PATH}/${META_BIN} \${PARAMS}"
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
print_info "Starting ${META_BIN} processes on the following hosts: ${HOSTS}"
|
|
print_info "Metadata server log: ${LOGFILE}"
|
|
|
|
execute_pdsh_cmd "${HOSTS}" "${CMD}" "false"
|
|
|
|
if [ "${PREFER_LOCAL}" = "true" ]
|
|
then
|
|
# create the preferred MDS file (actually just a symlink to the node ID file)
|
|
execute_pdsh_cmd "${HOSTS}" "rm -f ${PREFERRED_MDS_FILE}; \
|
|
ln -s ${DATAPATH}/${META_NUMID_FILE} ${PREFERRED_MDS_FILE}" "false"
|
|
fi
|
|
|
|
|
|
execute_pdsh_cmd "${HOSTS}" "echo %h,${META_BIN},${DATAPATH},${LOGFILE},${PIDFILE} >> ${STATUSFILE}" "false"
|
|
|
|
else
|
|
# no pdsh => do it manually with ssh loop
|
|
print_info "Starting ${META_BIN} processes"
|
|
print_info "Metadata server log: ${LOGFILE}"
|
|
|
|
# for each host, start server
|
|
IFS=,
|
|
for HOST in ${HOSTS}
|
|
do
|
|
print_info "Starting ${META_BIN} on host: ${HOST}"
|
|
if ! execute_ssh_cmd "${HOST}" "${CMD}"
|
|
then
|
|
print_error_and_exit "Unable to start ${META_BIN} on host: ${HOST}"
|
|
else
|
|
add_to_status_file "${HOST}" "${META_BIN}" "${DATAPATH}" "${LOGFILE}" "${PIDFILE}"
|
|
if [ "${PREFER_LOCAL}" = "true" ]
|
|
then
|
|
# create the preferred MDS file (actually just a symlink to the node ID file)
|
|
execute_ssh_cmd "${HOST}" "rm -f ${PREFERRED_MDS_FILE}; \
|
|
ln -s ${DATAPATH}/${META_NUMID_FILE} ${PREFERRED_MDS_FILE}"
|
|
fi
|
|
fi
|
|
done
|
|
unset IFS
|
|
fi
|
|
|
|
if [ "${QUIET}" != "true" ]
|
|
then
|
|
echo ""
|
|
fi
|
|
}
|
|
|
|
start_storage_servers()
|
|
{
|
|
local HOSTS=$1
|
|
local DATAPATH=$2
|
|
local MGMTD=$3
|
|
local PORT_SHIFT=$4 # port shift can be empty!
|
|
local CFG_PATH=$5 # may be empty
|
|
local CFG_FILE=${CFG_PATH}/${STORAGE_CFG_NAME}
|
|
|
|
local LOGFILE=${LOG_PATH}/${STORAGE_LOG}
|
|
local PIDFILE=/var/run/${STORAGE_BIN}-${CURRENTTIME}.pid
|
|
|
|
# error checks
|
|
if [ "${HOSTS}" = "" ] || [ "${MGMTD}" = "" ] || [ "${DATAPATH}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'start_storage_servers' was called without all \
|
|
needed parameters"
|
|
fi
|
|
|
|
DATAPATH=${DATAPATH}/${STORAGE_BIN}
|
|
|
|
PARAMS="sysMgmtdHost=${MGMTD} logStdFile=${LOGFILE} runDaemonized=true pidFile=${PIDFILE} ${CONNAUTH_LEGACY}"
|
|
|
|
if [ "${PORT_SHIFT}" != "" ]
|
|
then
|
|
PARAMS="${PARAMS} connPortShift=${PORT_SHIFT}"
|
|
fi
|
|
|
|
if [ "${TARGETFILE}" != "" ]
|
|
then
|
|
local ALL_TARGETS
|
|
ALL_TARGETS=$(get_all_targets_from_targetfile)
|
|
|
|
CMD="while read T; do \
|
|
if [ -d \"\${T}\" ] ; then EXISTING_TARGETS=\"\${EXISTING_TARGETS}\${T},\"; \
|
|
if [ \"${CLEAR_DATA}\" = \"true\" ]; then \
|
|
rm -rf \"${T}/*\"; \
|
|
fi;
|
|
fi ; \
|
|
done < <(echo \"${ALL_TARGETS}\" | tr ',' '\n' ); \
|
|
PARAMS=\"${PARAMS} storeStorageDirectory=\${EXISTING_TARGETS}\"; \
|
|
if [ -n \"${CFG_PATH}\" ] && [ -e \"${CFG_FILE}\" ]; then \
|
|
PARAMS=\"\${PARAMS} cfgFile=${CFG_FILE}\"; fi; \
|
|
${BEEGFS_BIN_PATH}/${STORAGE_BIN} \${PARAMS}"
|
|
else
|
|
CMD="PARAMS=\"${PARAMS} storeStorageDirectory=${DATAPATH}\"; \
|
|
if [ -n \"${CFG_PATH}\" ] && [ -e \"${CFG_FILE}\" ]; then \
|
|
PARAMS=\"\${PARAMS} cfgFile=${CFG_FILE}\"; fi; \
|
|
if [ \"${CLEAR_DATA}\" = \"true\" ]; then \
|
|
rm -rf ${DATAPATH}; fi; \
|
|
${BEEGFS_BIN_PATH}/${STORAGE_BIN} \${PARAMS}"
|
|
fi
|
|
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
print_info "Starting ${STORAGE_BIN} processes on the following hosts: ${HOSTS}"
|
|
# trailing ',' removed
|
|
print_info "Storage server log: ${LOGFILE}"
|
|
|
|
execute_pdsh_cmd "${HOSTS}" "${CMD}" "false"
|
|
|
|
if [ "${PREFER_LOCAL}" = "true" ]
|
|
then
|
|
# create the preferred target file (actually just a symlink to the target ID file)
|
|
execute_pdsh_cmd "${HOSTS}" "rm -f ${PREFERRED_TARGET_FILE}; \
|
|
ln -s ${DATAPATH}/${TARGET_NUMID_FILE} ${PREFERRED_TARGET_FILE}" "false"
|
|
fi
|
|
|
|
execute_pdsh_cmd "${HOSTS}" "echo %h,${STORAGE_BIN},${DATAPATH},${LOGFILE},${PIDFILE} >> ${STATUSFILE}" "false"
|
|
|
|
else
|
|
# no pdsh => do it manually with ssh loop
|
|
print_info "Starting ${STORAGE_BIN} processes"
|
|
print_info "Storage server log: ${LOGFILE}"
|
|
|
|
# for each host, start server
|
|
IFS=,
|
|
for HOST in ${HOSTS}
|
|
do
|
|
print_info "Starting ${STORAGE_BIN} on host: ${HOST}"
|
|
if ! execute_ssh_cmd "${HOST}" "${CMD}"
|
|
then
|
|
print_error_and_exit "Unable to start ${STORAGE_BIN} on host: ${HOST}"
|
|
else
|
|
add_to_status_file "${HOST}" "${STORAGE_BIN}" "${DATAPATH}" "${LOGFILE}" "${PIDFILE}"
|
|
if [ "${PREFER_LOCAL}" = "true" ]
|
|
then
|
|
# create the preferred target file (actually just a symlink to the target ID file)
|
|
execute_ssh_cmd "${HOST}" "rm -f ${PREFERRED_TARGET_FILE}; \
|
|
ln -s ${DATAPATH}/${TARGET_NUMID_FILE} ${PREFERRED_TARGET_FILE}"
|
|
fi
|
|
fi
|
|
done
|
|
unset IFS
|
|
fi
|
|
|
|
if [ "${TARGETFILE}" != "" ]
|
|
then
|
|
create_storage_pools "${HOSTS}"
|
|
fi
|
|
|
|
if [ "${QUIET}" != "true" ]
|
|
then
|
|
echo ""
|
|
fi
|
|
}
|
|
|
|
create_storage_pools()
|
|
{
|
|
local HOSTS=$1
|
|
|
|
if [ "${TARGETFILE}" != "" ]
|
|
then
|
|
while read LINE
|
|
do
|
|
IFS=: read POOL TARGETS <<< "${LINE}"
|
|
TARGETS=$(echo "${TARGETS}" | tr -d "[:space:]")
|
|
|
|
TARGET_IDS=
|
|
while read HOST
|
|
do
|
|
CMD="echo \"${TARGETS}\" | tr ',' '\n' | \
|
|
while read T; do if [ -f \"\${T}/${TARGET_NUMID_FILE}\" ]; then \
|
|
echo -n \"\$(cat \"\${T}/targetNumID\") \"; fi; done"
|
|
HOST_TARGETS=$(execute_ssh_cmd "${HOST}" "${CMD}")
|
|
for TARGET_ID in $HOST_TARGETS
|
|
do
|
|
TARGET_IDS="${TARGET_IDS:+$TARGET_IDS,}storage:$TARGET_ID"
|
|
done
|
|
done < <(echo "${HOSTS}" | tr ',' '\n')
|
|
|
|
if [ "$POOL" == "default" ] || [ "$POOL" == "Default" ]
|
|
then
|
|
"${CTL_BIN}" ${CTL_GLOBAL_PARAMS} \
|
|
pool set-alias storage:1 "${POOL}" > /dev/null
|
|
else
|
|
# create pool with collected ids
|
|
"${CTL_BIN}" ${CTL_GLOBAL_PARAMS} \
|
|
pool create "${POOL}" --targets "${TARGET_IDS}" > /dev/null
|
|
fi
|
|
done < <(grep -v "^$" "${TARGETFILE}" | grep -v "^\s*\#")
|
|
fi
|
|
}
|
|
|
|
assign_storage_pool_dirs()
|
|
{
|
|
local POOLS
|
|
POOLS=$("${CTL_BIN}" ${CTL_GLOBAL_PARAMS} \
|
|
pool list --columns alias | grep -v '^\s*$'| tail -n+2)
|
|
|
|
while read LINE
|
|
do
|
|
read ALIAS <<< "${LINE}"
|
|
|
|
"${CTL_BIN}" ${CTL_GLOBAL_PARAMS} \
|
|
entry create dir --mount=none "/${ALIAS}" > /dev/null
|
|
|
|
"${CTL_BIN}" ${CTL_GLOBAL_PARAMS} \
|
|
entry set --mount=none --pool "${ALIAS}" "/${ALIAS}" &> /dev/null
|
|
|
|
done < <(echo "${POOLS}")
|
|
}
|
|
|
|
check_targetfile()
|
|
{
|
|
local CHECK1
|
|
local CHECK2
|
|
local LINE_REGEX
|
|
|
|
LINE_REGEX='^\w+:([\w/_.-]+,?)+\s*$'
|
|
|
|
CHECK1=$(grep -i -P "${LINE_REGEX}" "${TARGETFILE}")
|
|
CHECK2=$(grep -i -P -v "${LINE_REGEX}" "${TARGETFILE}" | grep -v "^\s*\#" | \
|
|
grep -v "^$")
|
|
|
|
if [ "${CHECK1}" == "" ] || [ "${CHECK2}" != "" ]
|
|
then
|
|
print_error_and_exit "${TARGETFILE} contains invalid entries or is empty."
|
|
fi
|
|
|
|
CHECK1=$(grep -v "^$" "${TARGETFILE}" | grep -v "^\s*\#" | \
|
|
tr -d ' ' | awk -F ':' '{print $1}' | sort | uniq -i -d)
|
|
|
|
if [ "${CHECK1}" != "" ]
|
|
then
|
|
print_error_and_exit "${TARGETFILE} contains non-unique pool names."
|
|
fi
|
|
|
|
CHECK1=$(get_all_targets_from_targetfile | tr ',' '\n' | sort | uniq -i -d)
|
|
|
|
if [ "${CHECK1}" != "" ]
|
|
then
|
|
print_error_and_exit "${TARGETFILE} contains non-unique target paths."
|
|
fi
|
|
|
|
if [ "${CLEAR_DATA}" != "true" ]
|
|
then
|
|
print_error_and_exit "Using storage pools requires the -F option \
|
|
to make sure no old data is left."
|
|
fi
|
|
|
|
if [ "${STORAGE_MIRROR}" == "true" ]
|
|
then
|
|
print_error_and_exit "Using storage pools doesn't support storage mirroring (-k)."
|
|
fi
|
|
}
|
|
|
|
get_all_targets_from_targetfile()
|
|
{
|
|
local ALL_TARGETS
|
|
|
|
while read LINE
|
|
do
|
|
IFS=: read POOL TARGETS <<< ${LINE}
|
|
while read T
|
|
do
|
|
T=$(echo "${T}" | tr -d "[:space:]")
|
|
ALL_TARGETS="${ALL_TARGETS}${T},"
|
|
done < <(echo "${TARGETS}" | tr ',' '\n')
|
|
done < <(grep -v "^$" "${TARGETFILE}" | grep -v "^\s*\#")
|
|
echo "$ALL_TARGETS"
|
|
}
|
|
|
|
start_mgmtd()
|
|
{
|
|
local HOST=$1
|
|
local DATAPATH=$2
|
|
local PORT_SHIFT=$3 # port shift can be empty!
|
|
local CFG_PATH=$4 # may be empty
|
|
local CFG_FILE=${CFG_PATH}/${MGMTD_CFG_NAME}
|
|
|
|
local PIDFILE=/var/run/${MGMTD_BIN}-${CURRENTTIME}.pid
|
|
|
|
# error checks
|
|
if [ "${HOST}" = "" ] || [ "${DATAPATH}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'start_mgmtd' was called without all needed \
|
|
parameters"
|
|
fi
|
|
|
|
DATAPATH=${DATAPATH}/${MGMTD_BIN}
|
|
DBPATH=${DATAPATH}/beegfs-mgmtd.sqlite3
|
|
|
|
# start server
|
|
print_info "Starting ${MGMTD_BIN} processes"
|
|
|
|
print_info "Starting ${MGMTD_BIN} on host: ${HOST}"
|
|
|
|
PARAMS="--db-file ${DBPATH} --daemonize true --daemonize-pid-file ${PIDFILE} ${TLS_DISABLE} ${TLS_CERT_FILE} ${TLS_KEY_FILE} ${CONNAUTH_FLAG} ${LICENSE_FILE}"
|
|
|
|
if [ "${PORT_SHIFT}" != "" ]
|
|
then
|
|
PARAMS="${PARAMS} --port-shift ${PORT_SHIFT}"
|
|
fi
|
|
|
|
CMD="PARAMS=\"${PARAMS}\"; \
|
|
if [ -n \"${CFG_PATH}\" ] && [ -e \"${CFG_FILE}\" ]; then \
|
|
PARAMS=\"\${PARAMS} --config-file ${CFG_FILE}\"; fi; \
|
|
if [ \"${CLEAR_DATA}\" = \"true\" ]; then \
|
|
rm -rf ${DATAPATH}; fi; \
|
|
${BEEGFS_BIN_PATH}/${MGMTD_BIN} --init --db-file ${DBPATH}; \
|
|
${BEEGFS_BIN_PATH}/${MGMTD_BIN} \${PARAMS}"
|
|
|
|
if ! execute_ssh_cmd "${HOST}" "${CMD}"
|
|
then
|
|
print_error_and_exit "Unable to start ${MGMTD_BIN} on host: ${HOST}"
|
|
else
|
|
add_to_status_file "${HOST}" "${MGMTD_BIN}" "${DATAPATH}" "-" "${PIDFILE}"
|
|
fi
|
|
|
|
if [ "${QUIET}" != "true" ]
|
|
then
|
|
echo ""
|
|
fi
|
|
}
|
|
|
|
start_clients()
|
|
{
|
|
local HOSTS=$1
|
|
local MGMTD=$2
|
|
local MOUNTPOINT=$3
|
|
local PORT_SHIFT=$4 # port shift can be empty!
|
|
local CFG_PATH=$5 # may be empty
|
|
local CLIENT_CFG_FILE=${CFG_PATH}/${CLIENT_CFG_NAME}
|
|
|
|
local LOGFILE=${LOG_PATH}/${CLIENT_LOG}
|
|
|
|
# error checks
|
|
if [ "${HOSTS}" = "" ] || [ "${MGMTD}" = "" ] || [ "${MOUNTPOINT}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'start_clients_ssh' was called without all \
|
|
needed parameters"
|
|
fi
|
|
|
|
MODPROBE_CMD="modprobe beegfs"
|
|
REBUILD_CMD="/etc/init.d/${CLIENT_BIN} rebuild"
|
|
|
|
MOUNT_PARAMS="-osysMgmtdHost=${MGMTD},${CONNAUTH_LEGACY// /,}"
|
|
|
|
if [ "${PORT_SHIFT}" != "" ]
|
|
then
|
|
MOUNT_PARAMS="${MOUNT_PARAMS},connPortShift=${PORT_SHIFT}"
|
|
fi
|
|
|
|
MOUNT_CMD="PARAMS=\"${MOUNT_PARAMS}\"; if [ -n \"${CFG_PATH}\" ] && \
|
|
[ -e \"${CLIENT_CFG_FILE}\" ]; then PARAMS=\"\${PARAMS},cfgFile=${CLIENT_CFG_FILE}\"; fi; \
|
|
if [ \"${PREFER_LOCAL}\" = \"true\" ] && [ -e \"${PREFERRED_MDS_FILE}\" ]; \
|
|
then PARAMS=\"\${PARAMS},tunePreferredMetaFile=${PREFERRED_MDS_FILE}\"; fi; \
|
|
if [ \"${PREFER_LOCAL}\" = \"true\" ] && [ -e \"${PREFERRED_TARGET_FILE}\" ]; \
|
|
then PARAMS=\"\${PARAMS},tunePreferredStorageFile=${PREFERRED_TARGET_FILE}\"; fi; \
|
|
mkdir -p ${MOUNTPOINT} && ${MODPROBE_CMD} && mount -t beegfs beegfs_ondemand ${MOUNTPOINT} \${PARAMS}"
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
# trailing ',' removed in output
|
|
print_info "Starting ${CLIENT_BIN} processes on the following hosts: ${HOSTS}"
|
|
print_info "Client log: ${LOGFILE}"
|
|
|
|
execute_pdsh_cmd "${HOSTS}" "echo %h,${CLIENT_BIN},${MOUNTPOINT},${LOGFILE},- >> ${STATUSFILE}" "false"
|
|
|
|
execute_pdsh_cmd "${HOSTS}" "${MODPROBE_CMD} || ${REBUILD_CMD}" "false"
|
|
execute_pdsh_cmd "${HOSTS}" "${MOUNT_CMD}" "false"
|
|
|
|
if [ "${PREFER_LOCAL}" = "true" ] #set target count to 1
|
|
then
|
|
CTL_CMD="${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
|
|
entry set --num-targets 1 --chunk-size 512ki ${MOUNTPOINT} > /dev/null"
|
|
execute_pdsh_cmd "${HOSTS}" "${CTL_CMD}" "false"
|
|
fi
|
|
else
|
|
# no pdsh => do it manually with ssh loop
|
|
|
|
print_info "Starting ${CLIENT_BIN} processes"
|
|
print_info "Client log: ${LOGFILE}"
|
|
|
|
# for each host, start client
|
|
IFS=,
|
|
for HOST in ${HOSTS}
|
|
do
|
|
print_info "Starting ${CLIENT_BIN} on host: ${HOST}"
|
|
|
|
if ! execute_ssh_cmd "${HOST}" "${MODPROBE_CMD}"
|
|
then
|
|
print_info "Module beegfs could not be loaded on host: ${HOST}. Trying to recompile \
|
|
from source."
|
|
execute_ssh_cmd "${HOST}" "${REBUILD_CMD}"
|
|
fi
|
|
|
|
if ! execute_ssh_cmd "${HOST}" "${MOUNT_CMD}"
|
|
then
|
|
print_error_and_exit "Unable to start BeeGFS client on host: ${HOST}"
|
|
else
|
|
# NOTE : mountpoint as data path
|
|
add_to_status_file "${HOST}" "${CLIENT_BIN}" "${MOUNTPOINT}" "${LOGFILE}" "-"
|
|
|
|
if [ "${PREFER_LOCAL}" = "true" ] #set target count to 1
|
|
then
|
|
CTL_CMD="${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
|
|
entry set --num-targets 1 --chunk-size 512ki ${MOUNTPOINT} > /dev/null"
|
|
execute_ssh_cmd "${HOST}" "${CTL_CMD}"
|
|
fi
|
|
|
|
fi
|
|
done
|
|
unset IFS
|
|
fi
|
|
|
|
if [ "${QUIET}" != "true" ]
|
|
then
|
|
echo ""
|
|
fi
|
|
}
|
|
|
|
add_to_status_file()
|
|
{
|
|
local HOST=$1
|
|
local SERVICE=$2
|
|
local DATAPATH=$3
|
|
local LOGFILE=$4
|
|
local PIDFILE=$5
|
|
|
|
# error checks
|
|
if [ "${HOST}" = "" ] || [ "${SERVICE}" = "" ] || [ "${LOGFILE}" = "" ] || [ "${PIDFILE}" = "" ]
|
|
then
|
|
print_error_and_exit "Internal function 'add_to_status_file' was called without all \
|
|
needed parameters"
|
|
fi
|
|
|
|
INFO="${HOST},${SERVICE},${DATAPATH},${LOGFILE},${PIDFILE}"
|
|
execute_ssh_cmd "${HOST}" "echo ${INFO} >> ${STATUSFILE}"
|
|
}
|
|
|
|
### internal functions for beegfs-ondemand stop ###
|
|
|
|
# build the argument string for the "stoplocal" function
|
|
make_stoplocal_args()
|
|
{
|
|
local STOPLOCAL_ARGS=" -q" # quiet
|
|
if [ "${DELETE_DATA}" = "true" ]
|
|
then
|
|
STOPLOCAL_ARGS="${STOPLOCAL_ARGS} -d" # delete data
|
|
fi
|
|
|
|
if [ "${DELETE_LOGS}" = "true" ]
|
|
then
|
|
STOPLOCAL_ARGS="${STOPLOCAL_ARGS} -L" # delete logs
|
|
fi
|
|
|
|
if [ "${CLEANUP}" = "true" ]
|
|
then
|
|
STOPLOCAL_ARGS="${STOPLOCAL_ARGS} -c" # don't complain about missing files (from properly shut
|
|
fi # down beegfs-ondemand instances)
|
|
|
|
echo "${STOPLOCAL_ARGS}"
|
|
}
|
|
|
|
stop_procs()
|
|
{
|
|
local HOSTS=$1
|
|
local DELETE_DATA=$2
|
|
local DELETE_LOGS=$3
|
|
|
|
# prepare command for remote script
|
|
STOPSERVERSCMD="source ${BEEOND_STOPLOCAL}; \
|
|
do_stoplocal -s -i ${STATUSFILE} $(make_stoplocal_args)"
|
|
|
|
# issue the stop server command via ssh/pdsh
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
print_info "Stopping remaining processes on the following hosts: ${HOSTS}"
|
|
|
|
execute_pdsh_cmd "${HOSTS}" "${STOPSERVERSCMD}" "true"
|
|
else
|
|
# ssh mode - launch command for each host separately
|
|
IFS=,
|
|
for HOST in ${HOSTS}
|
|
do
|
|
print_info "Stopping remaining processes on host: ${HOST}"
|
|
execute_ssh_cmd "${HOST}" "${STOPSERVERSCMD}"
|
|
done
|
|
unset IFS
|
|
fi
|
|
|
|
# delete the statusfile
|
|
local DELETESTATUSFILECMD="rm -f ${STATUSFILE}"
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
print_info "Deleting status file on hosts: ${HOSTS}"
|
|
|
|
execute_pdsh_cmd "${HOSTS}" "${DELETESTATUSFILECMD}" "true"
|
|
else
|
|
# ssh mode - launch command for each host separately
|
|
IFS=,
|
|
for HOST in ${HOSTS}
|
|
do
|
|
print_info "Deleting status file on host: ${HOST}"
|
|
execute_ssh_cmd "${HOST}" "${DELETESTATUSFILECMD}"
|
|
done
|
|
unset IFS
|
|
fi
|
|
}
|
|
|
|
unmount_clients()
|
|
{
|
|
HOSTS=$1
|
|
# prepare command for remote script
|
|
local UMOUNTCMD
|
|
UMOUNTCMD="source ${BEEOND_STOPLOCAL}; \
|
|
do_stoplocal -u -i ${STATUSFILE} $(make_stoplocal_args)"
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
print_info "Unmounting file system on the following hosts: ${HOSTS}"
|
|
|
|
execute_pdsh_cmd "${HOSTS}" "${UMOUNTCMD}" "true"
|
|
else
|
|
# ssh mode - launch command for each host separately
|
|
IFS=,
|
|
for HOST in ${HOSTS}
|
|
do
|
|
print_info "Unmounting file system on host: ${HOST}"
|
|
execute_ssh_cmd "${HOST}" "${UMOUNTCMD}"
|
|
done
|
|
unset IFS
|
|
fi
|
|
}
|
|
|
|
# Blocks until all targets are online/good
|
|
# Parameters: nodetype numNodes
|
|
wait_online_good()
|
|
{
|
|
local NODE_TYPE=$1
|
|
local NUM_NODES=$2
|
|
|
|
#first, wait for the correct number of *nodes* to become available.
|
|
echo -n "Waiting until all nodes have registered with mgmtd..."
|
|
local CTLCMD="${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
|
|
node list --node-type ${NODE_TYPE}"
|
|
while [ ! "$(${CTLCMD} | head -n-1 | tail -n+2 | wc -l)" = "${NUM_NODES}" ]
|
|
do
|
|
echo -n "."
|
|
sleep 1
|
|
done
|
|
echo
|
|
|
|
# now wait for all *targets* to become online/good. (also works for metadata servers, since they
|
|
# are internally treated as one target per server)
|
|
echo -n "Waiting for all nodes/targets to be online and in sync..."
|
|
local CTLCMD="${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
|
|
target list --node-type ${NODE_TYPE} --state --columns reachability,consistency"
|
|
while [ ! "$(${CTLCMD} | head -n-1 | tail -n+2 | grep "Online\s\+Good" -c)" = "$(${CTLCMD} | head -n-1 | tail -n+2 | wc -l)" ]
|
|
do
|
|
echo -n "."
|
|
sleep 1
|
|
done
|
|
echo
|
|
|
|
# and now, we wait until all targets have reported their available space and inodes. It should be
|
|
# good enough to only check space, because both will be reported at the same time. Without this,
|
|
# the automatic mirror group creation might fail, because it compares target sizes and free
|
|
# inodes.
|
|
echo -n "Waiting for all nodes/targets to report their available space..."
|
|
local CTLCMD="${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
|
|
target list --node-type ${NODE_TYPE} --raw --columns space"
|
|
while [ ! "$(${CTLCMD} | head -n-1 | tail -n+2 | grep -s '^-\s\+$' | wc -l)" == 0 ]
|
|
do
|
|
echo -n "."
|
|
sleep 1
|
|
done
|
|
echo
|
|
|
|
}
|
|
|
|
### main functions ###
|
|
do_start()
|
|
{
|
|
CLEAR_DATA="false"
|
|
USE_PDSH="false"
|
|
PREFER_LOCAL="false"
|
|
QUIET="false"
|
|
USE_TMPFS="false"
|
|
STORAGE_MIRROR="false"
|
|
META_MIRROR="false"
|
|
PORT_SHIFT=${DEFAULT_PORT_SHIFT}
|
|
ASSIGN_STORAGE_POOL_DIRS=true
|
|
CONNAUTH_FLAG="--auth-disable"
|
|
CONNAUTH_LEGACY="connDisableAuthentication=true"
|
|
TLS_DISABLE="--tls-disable"
|
|
TLS_CERT_FILE=""
|
|
TLS_KEY_FILE=""
|
|
LICENSE_FILE=""
|
|
MGMTD_GRPC_PORT=${DEFAULT_MGMTD_GRPC_PORT}
|
|
|
|
while getopts ":c:d:f:Fi:m:n:p:G:lL:Pb:s:qrkjt:TCEH" opt; do
|
|
case $opt in
|
|
n)
|
|
HOSTFILE=${OPTARG}
|
|
;;
|
|
d)
|
|
DATA_PATH=${OPTARG}
|
|
;;
|
|
F)
|
|
CLEAR_DATA="true"
|
|
;;
|
|
c)
|
|
MOUNTPOINT=${OPTARG}
|
|
;;
|
|
i)
|
|
STATUSFILE=${OPTARG}
|
|
;;
|
|
L)
|
|
LOG_PATH=${OPTARG}
|
|
;;
|
|
m)
|
|
if ! [[ ${OPTARG} =~ ^[0-9]+$ ]];
|
|
then
|
|
print_error_and_exit "number of metadata servers must be numeric";
|
|
fi
|
|
NUM_META_SERVER=${OPTARG}
|
|
;;
|
|
p)
|
|
if ! [[ ${OPTARG} =~ ^[0-9]+$ ]];
|
|
then
|
|
print_error_and_exit "port shift must be numeric";
|
|
fi
|
|
PORT_SHIFT=${OPTARG}
|
|
;;
|
|
G)
|
|
if ! [[ ${OPTARG} =~ ^[0-9]+$ ]];
|
|
then
|
|
print_error_and_exit "management gRPC port must be numeric";
|
|
fi
|
|
MGMTD_GRPC_PORT=${OPTARG}
|
|
;;
|
|
P)
|
|
USE_PDSH="true"
|
|
;;
|
|
b)
|
|
PDSH=${OPTARG}
|
|
;;
|
|
s)
|
|
if ! [[ ${OPTARG} =~ ^[0-9]+$ ]];
|
|
then
|
|
print_error_and_exit "number of storage servers must be numeric";
|
|
fi
|
|
NUM_STORAGE_SERVER=$OPTARG
|
|
;;
|
|
f)
|
|
if ! [[ -d ${OPTARG} ]]; then
|
|
print_error_and_exit "The -f option expects a path to a directory: ${OPTARG}"
|
|
fi
|
|
CONFIGPATH=${OPTARG}
|
|
;;
|
|
l)
|
|
PREFER_LOCAL="true"
|
|
;;
|
|
q)
|
|
QUIET="true"
|
|
;;
|
|
r)
|
|
USE_TMPFS="true"
|
|
;;
|
|
k)
|
|
if [[ -z ${LICENSE_FILE} ]] ; then
|
|
print_error_and_exit "To use mirroring, licensing (option -H) must be configured before the option -k"
|
|
fi
|
|
STORAGE_MIRROR="true"
|
|
;;
|
|
j)
|
|
if [[ -z ${LICENSE_FILE} ]] ; then
|
|
print_error_and_exit "To use mirroring, licensing (option -H) must be configured before the option -j"
|
|
fi
|
|
META_MIRROR="true"
|
|
;;
|
|
t)
|
|
if [[ -z ${LICENSE_FILE} ]] ; then
|
|
print_error_and_exit "To use storage pools, licensing (option -H) must be configured before the option -t"
|
|
fi
|
|
TARGETFILE=${OPTARG}
|
|
;;
|
|
T)
|
|
ASSIGN_STORAGE_POOL_DIRS="false"
|
|
;;
|
|
C)
|
|
if ! [[ -f ${CONFIGPATH}/conn.auth ]] ; then
|
|
print_error_and_exit "To use connection authentication, a config path (option -f) that contains a \"conn.auth\" file and is available on all nodes needs to be specified before the option -C"
|
|
fi
|
|
CONNAUTH_FLAG="--auth-file ${CONFIGPATH}/conn.auth"
|
|
CONNAUTH_LEGACY="connAuthFile=${CONFIGPATH}/conn.auth connDisableAuthentication=false"
|
|
;;
|
|
E)
|
|
if ! [[ -f ${CONFIGPATH}/cert.pem && -f ${CONFIGPATH}/key.pem ]] ; then
|
|
print_error_and_exit "To use TLS encryption, a config path (option -f) that contains a \"cert.pem\" and a \"key.pem\" file and is available on all nodes needs to be specified before the option -E"
|
|
fi
|
|
TLS_DISABLE="--tls-disable=false"
|
|
TLS_CERT_FILE="--tls-cert-file ${CONFIGPATH}/cert.pem"
|
|
TLS_KEY_FILE="--tls-key-file ${CONFIGPATH}/key.pem"
|
|
;;
|
|
H)
|
|
if ! [[ -f ${CONFIGPATH}/license.pem ]] ; then
|
|
print_error_and_exit "To use enterprise features, a config path (option -f) that contains a \"license.pem\" file and is available on all nodes needs to be specified before the option -H"
|
|
fi
|
|
LICENSE_FILE="--license-cert-file ${CONFIGPATH}/license.pem"
|
|
;;
|
|
\?)
|
|
echo "ERROR: invalid option: -${OPTARG}" >&2
|
|
print_usage_and_exit
|
|
;;
|
|
:)
|
|
echo "ERROR: Option -${OPTARG} requires an argument" >&2
|
|
print_usage_and_exit
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
PDSH=${PDSH:-${DEFAULT_PDSH_PATH}}
|
|
|
|
if [ -z "${PDSH}" ]; then
|
|
echo "Unable to autodetect pdsh. Please specify using the -b option."
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
check_hostfile
|
|
check_datapath
|
|
check_mountpoint
|
|
|
|
if [ "${STORAGE_MIRROR}" = "true" ] && [ "${PREFER_LOCAL}" = "true" ]
|
|
then
|
|
print_error_and_exit "Options -k and -l are mutually exclusive."
|
|
fi
|
|
|
|
if [ "${TARGETFILE}" != "" ]
|
|
then
|
|
check_targetfile
|
|
fi
|
|
|
|
|
|
print_info "Using status information file: ${STATUSFILE}"
|
|
|
|
NODECOUNT=$(grep -v '^$' ${HOSTFILE} | uniq | wc -l) #ignore empty lines
|
|
NODES=( $(grep -v '^$' ${HOSTFILE} | uniq) ) #store as array and ignore empty lines
|
|
|
|
# make list of all nodes first - needed for clients and tmpfs mounts
|
|
ALLNODES=$(IFS=,; echo "${NODES[*]}")
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
# check all nodes for reachability and working PDSH
|
|
check_pdsh "${ALLNODES}"
|
|
else
|
|
# check reachability of all nodes
|
|
for HOST in "${NODES[@]}"
|
|
do
|
|
check_reachability "${HOST}"
|
|
done
|
|
fi
|
|
|
|
check_statusfile "${ALLNODES}"
|
|
|
|
# if the number of meta servers given is 0 or greater than node count, start it on all hosts
|
|
if [ ${NUM_META_SERVER} -eq 0 ] || [ ${NUM_META_SERVER} -gt "${NODECOUNT}" ]
|
|
then
|
|
NUM_META_SERVER=${NODECOUNT}
|
|
print_info "Number of metadata servers automatically set to ${NUM_META_SERVER}"
|
|
fi
|
|
|
|
# if the number of storage servers given is 0 or greater than node count, start it on hosts
|
|
if [ ${NUM_STORAGE_SERVER} -eq 0 ] || [ ${NUM_STORAGE_SERVER} -gt "${NODECOUNT}" ]
|
|
then
|
|
NUM_STORAGE_SERVER=${NODECOUNT}
|
|
print_info "Number of storage servers automatically set to ${NUM_STORAGE_SERVER}"
|
|
fi
|
|
|
|
# create the log path on all nodes if it doesn't exist yet
|
|
# without an existing logfile path, the server won't start up
|
|
create_log_path "${NODES[@]}"
|
|
|
|
# take the first host as master host
|
|
MASTERHOST=${NODES[0]}
|
|
|
|
# delete STATUS_FILE
|
|
execute_ssh_cmd "${MASTERHOST}" "rm -f ${STATUSFILE}"
|
|
|
|
# mount tmpfs
|
|
if [ "${USE_TMPFS}" = "true" ]
|
|
then
|
|
start_tmpfs "${ALLNODES}" "${DATA_PATH}"
|
|
fi
|
|
|
|
# MASTERHOST is also mgmtd host
|
|
MGMTD=${MASTERHOST}
|
|
|
|
# The gRPC port for MGMTD, important for CTL
|
|
MGMTD_GRPC_PORT=$((MGMTD_GRPC_PORT+PORT_SHIFT))
|
|
|
|
# Combine variables relevant to CTL into one
|
|
CTL_GLOBAL_PARAMS="--mgmtd-addr ${MGMTD}:${MGMTD_GRPC_PORT} ${TLS_DISABLE} ${TLS_CERT_FILE} ${CONNAUTH_FLAG}"
|
|
|
|
# port shift and config path may be empty, but that's ok
|
|
start_mgmtd "${MGMTD}" "${DATA_PATH}" "${PORT_SHIFT}" "${CONFIGPATH}"
|
|
|
|
# take the first NUM_STORAGE_SERVER as storage servers
|
|
STORAGENODES=$(IFS=,; echo "${NODES[*]:0:${NUM_STORAGE_SERVER}}")
|
|
|
|
# port shift and config path may be empty, but that's ok
|
|
start_storage_servers "${STORAGENODES}" "${DATA_PATH}" "${MGMTD}" "${PORT_SHIFT}" "${CONFIGPATH}"
|
|
|
|
# take the first NUM_META_SERVER as metadata servers
|
|
METANODES=$(IFS=,; echo "${NODES[*]:0:${NUM_META_SERVER}}")
|
|
|
|
# port shift and config path may be empty, but that's ok
|
|
start_meta_servers "${METANODES}" "${DATA_PATH}" "${MGMTD}" "${PORT_SHIFT}" "${CONFIGPATH}"
|
|
|
|
# give the management daemon some time to get all information from servers
|
|
wait_online_good storage "${NUM_STORAGE_SERVER}"
|
|
wait_online_good meta "${NUM_META_SERVER}"
|
|
|
|
# enable mirroring
|
|
if [ "${STORAGE_MIRROR}" = "true" ]
|
|
then
|
|
if ! ${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
|
|
mirror autocreate storage > /dev/null
|
|
then
|
|
print_error_and_exit "Unable to create storage target buddy mirror groups."
|
|
fi
|
|
|
|
# all metadata servers need to know about the storage mirror groups
|
|
sleep 8
|
|
fi
|
|
|
|
if [ "${META_MIRROR}" = "true" ]
|
|
then
|
|
if ! ${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
|
|
mirror autocreate meta > /dev/null
|
|
then
|
|
print_error_and_exit "Unable to create metadata server buddy mirror groups."
|
|
fi
|
|
|
|
# all metadata servers need to know about the newly created mirror groups
|
|
sleep 8
|
|
|
|
if ! ${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
|
|
mirror init --yes > /dev/null
|
|
then
|
|
print_error_and_exit "Unable to enable metadata mirroring."
|
|
fi
|
|
|
|
# wait for initial resync
|
|
wait_online_good meta "${NUM_META_SERVER}"
|
|
fi
|
|
|
|
# take all hosts as client
|
|
# port shift and config path may be empty, but that's ok
|
|
start_clients "${ALLNODES}" "${MGMTD}" "${MOUNTPOINT}" "${PORT_SHIFT}" "${CONFIGPATH}"
|
|
|
|
if [ "${ASSIGN_STORAGE_POOL_DIRS}" = "true" ] && [ "${TARGETFILE}" != "" ]
|
|
then
|
|
assign_storage_pool_dirs
|
|
fi
|
|
|
|
if [ "${STORAGE_MIRROR}" = "true" ]
|
|
then
|
|
if ! ${CTL_BIN} ${CTL_GLOBAL_PARAMS} \
|
|
entry set --chunk-size 512ki --num-targets 4 --pattern mirrored "${MOUNTPOINT}" > /dev/null
|
|
then
|
|
print_error_and_exit "Unable to enable mirroring pattern."
|
|
fi
|
|
fi
|
|
|
|
echo " ****************************************************************************** "
|
|
echo "* BeeOND setup finished successfully! To configure the \`beegfs\` command line"
|
|
echo "* utility to talk to the BeeOND mgmtd service, some additional configuration"
|
|
echo "* might be necessary."
|
|
echo "*"
|
|
echo "* If there is more than one BeeGFS mounted on the node that runs \`beegfs\`,"
|
|
echo "* the correct mgmtd will need to be configured by using"
|
|
echo "* --mgmtd-addr \"${MGMTD}:${MGMTD_GRPC_PORT}\" or"
|
|
echo "* export BEEGFS_MGMTD_ADDR=\"${MGMTD}:${MGMTD_GRPC_PORT}\""
|
|
if ! [[ -z ${CONNAUTH_FLAG} ]]
|
|
then
|
|
echo "*"
|
|
echo "* To configure \`beegfs\` to use the correct connection authentication file,"
|
|
echo "* please use"
|
|
echo "* --auth-file \"${CONFIGPATH}/conn.auth\" or"
|
|
echo "* export BEEGFS_AUTH_FILE=\"${CONFIGPATH}/conn.auth\""
|
|
fi
|
|
if ! [[ -z ${TLS_DISABLE} ]]
|
|
then
|
|
echo "*"
|
|
echo "* To configure \`beegfs\` to use TLS encryption when talking to mgmtd, use"
|
|
echo "* --tls-cert-file \"${CONFIGPATH}/cert.pem\" or"
|
|
echo "* export BEEGFS_TLS_CERT_FILE=\"${CONFIGPATH}/cert.pem\""
|
|
fi
|
|
echo " ****************************************************************************** "
|
|
}
|
|
|
|
do_stop()
|
|
{
|
|
DELETE_DATA="false"
|
|
USE_PDSH="false"
|
|
DELETE_LOGS="false"
|
|
QUIET="false"
|
|
CLEANUP="false"
|
|
|
|
while getopts "di:n:Pb:Lcq" opt; do
|
|
case $opt in
|
|
n)
|
|
HOSTFILE=${OPTARG}
|
|
;;
|
|
i)
|
|
STATUSFILE=${OPTARG}
|
|
;;
|
|
d)
|
|
DELETE_DATA="true"
|
|
;;
|
|
c)
|
|
CLEANUP="true"
|
|
;;
|
|
P)
|
|
USE_PDSH="true"
|
|
;;
|
|
b)
|
|
PDSH=${OPTARG}
|
|
;;
|
|
L)
|
|
DELETE_LOGS="true"
|
|
;;
|
|
q)
|
|
QUIET="true"
|
|
;;
|
|
\?)
|
|
echo "ERROR: invalid option: -${OPTARG}" >&2
|
|
print_usage_and_exit
|
|
;;
|
|
:)
|
|
echo "ERROR: Option -${OPTARG} requires an argument" >&2
|
|
print_usage_and_exit
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
PDSH=${PDSH:-${DEFAULT_PDSH_PATH}}
|
|
|
|
if [ -z "${PDSH}" ]; then
|
|
echo "Unable to autodetect pdsh. Please specify using the -b option."
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
check_hostfile
|
|
|
|
print_info "Using status information file: ${STATUSFILE}"
|
|
|
|
NODES=( $(grep -v '^$' ${HOSTFILE} | uniq) ) #store as array and ignore empty lines
|
|
ALLNODES=$(IFS=,; echo "${NODES[*]}")
|
|
|
|
if [ "${USE_PDSH}" = "true" ]
|
|
then
|
|
# check all nodes for reachability and working PDSH
|
|
check_pdsh "${ALLNODES}"
|
|
else
|
|
# check reachability of all nodes
|
|
for HOST in ${NODES[*]}
|
|
do
|
|
check_reachability "${HOST}"
|
|
done
|
|
fi
|
|
|
|
# take the first host as master host
|
|
MASTERHOST=${NODES[0]}
|
|
|
|
ALLNODES=$(IFS=,; echo "${NODES[*]}")
|
|
|
|
# read status file on master host and stop all servers
|
|
unmount_clients "${ALLNODES}"
|
|
|
|
# read status file on master host and stop all servers
|
|
stop_procs "${ALLNODES}" ${DELETE_DATA} ${DELETE_LOGS}
|
|
}
|
|
|
|
# print help if no arguments given
|
|
if [ $# -eq 0 ] ; then
|
|
print_usage_and_exit
|
|
fi
|
|
|
|
|
|
# parse arguments
|
|
ACTION=$1
|
|
|
|
if [ "${ACTION}" = "start" ]
|
|
then
|
|
shift
|
|
do_start "$@"
|
|
elif [ "${ACTION}" = "stop" ]
|
|
then
|
|
shift
|
|
ERROR="false" # store if we encountered an error, so that we can return a statuscode
|
|
# (because the stop function does not abort on error)
|
|
do_stop "$@"
|
|
if [ "${ERROR}" = "true" ]
|
|
then
|
|
exit 1
|
|
fi;
|
|
elif [ "${ACTION}" = "stoplocal" ]
|
|
then
|
|
shift
|
|
do_stoplocal "$@"
|
|
exit $?
|
|
else
|
|
print_usage_and_exit
|
|
fi
|