#!/bin/bash # beegfs-ondemand-stoplocal # This file contains helper functions to stop BeeOND services locally on one node. # This is meant to be sourced from another script (i.e. beeond) # Checks the return code of the last command that has been executed. If the code is !=0, indicating # an error, it prints a message and sets an error flag. # Parameters: # * The return code of the last command # * A string containing a hint on what was being done that could have caused the error. It is # used for the error message. # Modifies: # ERROR: Is set to "true" when an error was encountered. sl_checkerror() { if [ "${1}" != 0 ] then echo "ERROR: There was a problem ${2} on host $(hostname)" ERROR="true" fi } # Prints an info message if the QUIET variable is not set. # Parameter: # A string (the message). It is prefixed with INFO when printed. # Checks: # QUIET: If "true", nothing is printed. sl_print_info() { local MESSAGE=${1} if [ "${QUIET}" != "true" ] then echo "INFO: ${MESSAGE}" fi } # unmounts tmpfs mounts listed in the status file sl_unmount_tmpfs() { local SERVICE MOUNTPOINT _ IFS=, while read -r _ SERVICE MOUNTPOINT _ _ do if [ "${SERVICE}" != "tmpfs" ] then continue fi sl_print_info "Unmounting tmpfs at ${MOUNTPOINT}" if [ "${CLEANUP}" != "true" ] then fuser -k "${MOUNTPOINT}" umount -l "${MOUNTPOINT}" sl_checkerror $? "unmounting tmpfs" else fuser -k "${MOUNTPOINT}" 2>/dev/null umount -l "${MOUNTPOINT}" 2>/dev/null true fi done < "${STATUSFILE}" unset IFS } # Unmounts all local mounts listed in the status file sl_unmount_local_mounts() { local SERVICE MOUNTPOINT _ IFS=, while read -r _ SERVICE MOUNTPOINT _ _ do if [ "${SERVICE}" != "${CLIENTSERVICE}" ] then continue fi sl_print_info "Unmounting ${MOUNTPOINT}" if [ "${CLEANUP}" != "true" ] then fuser -k "${MOUNTPOINT}" # no "sl_checkerror" after this, becuase fuser also returns # non-zero when there are no processes accessing the file system umount -l "${MOUNTPOINT}" sl_checkerror $? "unmounting the ondemand file system" else fuser -k "${MOUNTPOINT}" 2>/dev/null umount -l "${MOUNTPOINT}" 2>/dev/null true # reset error code before next invocation of sl_checkerror fi done < "${STATUSFILE}" unset IFS # try to remove the client module - this is allowed to fail, because we might have a "normal" # beegfs mount somewhere in the system. rmmod beegfs 2>/dev/null || true } # sends a SIGTERM to a process, then waits until the process is stopped or appriximately 10 seconds # have passed. # Parameter: # The PID of the proces # Returns: # 0 if process was stopped within 10 seconds, 1 if it wasn't, 255 if initial kill returned an # error. sl_kill_check() { local PID=$1 if ! kill "$PID" then return 255 fi for ((i=0; i<100; i++)) do if kill -0 "$PID" 2>/dev/null then sleep 0.1 else return 0 fi done return 1 } # stops all services listed in the status file except for clients sl_stop_services() { local SERVICE DATAPATH PIDFILE _ IFS=, while read -r _ SERVICE DATAPATH _ PIDFILE do if [ "${PIDFILE}" != "-" ] # pidfile is "-" for beegfs-client and tmpfs, because it is not # a process then if [ -e "${PIDFILE}" ] then PID=$(cat "${PIDFILE}") sl_kill_check "${PID}" RES=$? if [ $RES -eq 1 ] then echo "ERROR: ${SERVICE} did not stop within 10 seconds (PID ${PID})." ERROR="true" elif [ $RES -eq 255 ] then echo "ERROR: ${SERVICE} does not seem to be running any more (PID ${PID})." fi else if [ "${CLEANUP}" != "true" ] then echo "ERROR: PID file ${PIDFILE} does not exist on host $(hostname)" ERROR="true" fi fi # delete data... if [ "${DELETE_DATA}" = "true" ] then if [ "${DATAPATH}" != "-" ] then sl_print_info "Deleting stored data; Data path: ${DATAPATH}" rm -rf "${DATAPATH}" sl_checkerror $? "deleting ${DATAPATH}" fi fi # delete preferredMds and preferredTarget files rm -f "${PREFERRED_MDS_FILE}" sl_checkerror $? "deleting ${PREFERRED_MDS_FILE}" rm -f "${PREFERRED_TARGET_FILE}" sl_checkerror $? "deleting ${PREFERRED_TARGET_FILE}" fi done < "${STATUSFILE}" unset IFS # unmount tempfs if it was used sl_unmount_tmpfs } # deletes the logfiles listed in the status file if ERROR is set to false # If the log directory is empty afterwards, it is also deleted sl_delete_logfiles() { local LOGFILE # declare it here, because the last LOGFILE path is needed to delete the directory # after the loop # delete log files if [ "${ERROR}" != "true" ] # if we haven't encountered an error yet. then # delete log files local SERVICE LOGFILE _ IFS=, while read -r _ SERVICE _ LOGFILE _ do if [ "${ONLY_UNMOUNT}" = "true" ] && [ "${SERVICE}" != "${CLIENTSERVICE}" ] then continue; fi if [ "${ONLY_STOP_SERVER}" = "true" ] && [ "${SERVICE}" = "${CLIENTSERVICE}" ] then continue; fi if [ "${LOGFILE}" != "-" ] then sl_print_info "Deleting log file ${LOGFILE}" rm -f "${LOGFILE}" 2>/dev/null # beegfs-client does not (always) generate a logfile. # in this case rm gives an error message, but we don't # want to see it. - for the same reason no sl_checkerror # here fi done < "${STATUSFILE}" unset IFS # delete log directory if empty local LOG_DIR LOG_DIR=$(dirname "${LOGFILE}") if [ "${LOG_DIR}" != "." ] && [ ! "$(ls -A "${LOG_DIR}")" ] then echo "Deleting log directory ${LOG_DIR}" rmdir "${LOG_DIR}" sl_checkerror $? "deleting ${LOG_DIR}" fi else sl_print_info "Not deleting log files because of a previous error." fi } # The "main" stoplocal function. From here, the functions to unmount the file system and stop the # services are called. If there was no error, sl_delete_logfiles is called, and the status file is # also removed. # Checks the following variables: # STATUSFILE The location of the status file # ONLY_STOP_SERVER If "true", the umount_local_mounts step is skipped, and status file is not # removed. # ONLY_UNMOUNT If "true", the stop_services step is skipped, and status file is not # removed. # Modifies: # ERROR Is set to "true" (and an error message is printed to %2) if an error is # encountered in any step. stoplocal() { sl_print_info "Using status file ${STATUSFILE}" # do the actual shutdown process # unmount the file system (skip this step if we only want to stop the server) if [ "${ONLY_STOP_SERVER}" != "true" ] then sl_unmount_local_mounts fi # stop the services (skip this step if we only got asked to unmount the file system) if [ "${ONLY_UNMOUNT}" != "true" ] then sl_stop_services fi # delete the logfiles if [ "${ERROR}" != "true" ] && [ "${DELETE_LOGS}" = "true" ] then sl_delete_logfiles fi # delete the status file (only if a full shutdown was requested) if [ "${ONLY_UNMOUNT}" != "true" ] && [ "${ONLY_STOP_SERVER}" != "true" ] then rm -f "${STATUSFILE}" sl_checkerror $? "deleting the status file" fi } # the user interface / main entry point to stoplocal # Options: # -i FILENAME => Status information filename # (DEFAULT: ${DEFAULT_STATUSFILE}) # -d => Delete BeeGFS data on disks # -L => Delete log files after successful shutdown # -q => Suppress \"INFO\" messages, only print \"ERROR\"s # -c => "Cleanup": Remove remaining processes and directories of a # potentially unsuccessful shutdown of an earlier beeond # instance. This switch silences the error message when a status # information file is not found or an unmount command fails; # instead, a message is printed (if \"INFO\" messages are not # suppressed) when a status file DOES exist, because this means # there actually was an instance before that is now being # cleaned up. # -u => ONLY unmount the file systems(*) # -s => ONLY stop non-client services(*) # # (*) Options -u and -s are mutually exclusive # If -u or -s are given, the status file is not deleted. do_stoplocal() { local DEFAULT_STATUSFILE=/tmp/beeond.tmp local CLIENTSERVICE=beegfs-client local DELETE_DATA="false" local DELETE_LOGS="false" local ONLY_UNMOUNT="false" local ONLY_STOP_SERVER="false" local PREFERRED_MDS_FILE=/tmp/preferredMds.fod local PREFERRED_TARGET_FILE=/tmp/preferredTarget.fod local QUIET="false" local ERROR="false" local STATUSFILE="${DEFAULT_STATUSFILE}" local OPTIND=1 local OPTARG="" while getopts ":i:dLusqc" opt "$@" do case $opt in i) STATUSFILE=${OPTARG} ;; d) DELETE_DATA="true" ;; L) DELETE_LOGS="true" ;; u) if [ "${ONLY_STOP_SERVER}" = "true" ] then echo "ERROR: Options -s and -${OPTARG} are mutually exclusive" >&2 if declare -f -F print_usage_and_exit >/dev/null then print_usage_and_exit; fi return 1 fi ONLY_UNMOUNT="true" ;; s) if [ "${ONLY_UNMOUNT}" = "true" ] then echo "ERROR: Options -u and -${OPTARG} are mutually exclusive" >&2 if declare -f -F print_usage_and_exit >/dev/null then print_usage_and_exit; fi return 1 fi ONLY_STOP_SERVER="true" ;; q) QUIET="true" ;; c) CLEANUP="true" ;; \?) echo "ERROR: invalid option -${OPTARG}" >&2 if declare -f -F print_usage_and_exit >/dev/null then print_usage_and_exit; fi return 1 ;; :) echo "ERROR: Option -${OPTARG} requires an argument" >&2 if declare -f -F print_usage_and_exit >/dev/null then print_usage_and_exit; fi return 1 ;; esac done # if statusfile can't be found, print a message and exit. if [ ! -f ${STATUSFILE} ] then # only print message when we're not doing a cleanup run. if [ "${CLEANUP}" != "true" ] then echo "ERROR: Status file ${STATUSFILE} not found." >&2 # If the user has specified a status file, just give a brief error message and exit. # If the user has not specified a status file, give the full usage info - maybe the user # didn't know how to specify a status file. if [ "${STATUSFILE}" = "${DEFAULT_STATUSFILE}" ] then if declare -f -F "print_usage_and_exit" >/dev/null then print_usage_and_exit; fi fi return 1 else return 0 # return 0 if we're doing a cleanup so that pdsh doesn't complain fi fi # if we're doing a cleanup run, inform the user that a status file was found. if [ "${CLEANUP}" = "true" ] then sl_print_info "Status file found." fi stoplocal if [ "${ERROR}" = "true" ] then return 1 else return 0 fi }