#!/bin/bash # Source helper script. ABSOLUTE_PATH=$(dirname "$(readlink -e "$0")") # using readlink, because somone might be calling # this script using a symlink if [ -e "${ABSOLUTE_PATH}/../lib/beeond-lib" ] then BEEOND_LIB="${ABSOLUTE_PATH}/../lib/beeond-lib" else BEEOND_LIB="${ABSOLUTE_PATH}/../scripts/lib/beeond-lib" fi #shellcheck source=scripts/lib/beeond-lib source "${BEEOND_LIB}" PARALLEL="${ABSOLUTE_PATH}/../thirdparty/parallel/parallel" # Print usage. print_usage_and_exit() { echo "" echo "BeeOND copy (http://www.beegfs.com)" echo "" echo "DESCRIPTION:" echo " BeeGFS OnDemand copying/staging system." echo "" echo "USAGE: $(basename "$0") " echo "" echo "ACTIONS:" echo " The first argument to $(basename "$0") is considered to be an action that the script" echo " should perform." echo "" echo " The following actions are available:" echo "" echo " stagein: (EXPERIMENTAL)" echo " Stage a complete directory from the global storage in to BeeOND." echo "" echo " Mandatory arguments:" echo " -n FILENAME => File containing the list of nodes where the parallel" echo " copy should be performed. All nodes must have access to" echo " the global and local directories." echo " -g PATH => Directory on global storage. (*)" echo " -l PATH => Directory on local (BeeOND) storage. (*)" echo "" echo " Notes:" echo " (*) Global and local directories have to be specified in form of an" echo " absolute path." echo "" echo " stageout: (EXPERIMENTAL)" echo " Stage a complete directory out from BeeOND to the global storage." echo " Only changes will be staged out of the local directory and committed to" echo " the global directory." echo "" echo " Mandatory arguments:" echo " -l PATH => Local directory." echo "" echo " Notes:" echo " The contents will be completely synchronized, i.e. deleted files on " echo " BeeOND will get deleted on global storage, too." echo "" echo " copy:" echo " Perform a parallel copy of a set of files or folders." echo " Files will be copied into the target directory, folders will be copied" echo " recursively. The copy process is parallelized across the set of nodes" echo " specified in the nodefile." echo "" echo " Mandatory arguments:" echo " -n FILENAME => File containing the list of nodes where the parallel" echo " copy should be performed. All nodes must have access to" echo " the sources and the target directory." echo "" echo " Notes:" echo " Further command line arguments are consdiered source directory or file" echo " names. The last command line argument specifies the target directory" echo "" echo "EXAMPLES:" echo " Stage data from /mnt/beegfs-global/dataset in to BeeOND mounted at /mnt/beeond," echo " using the nodes given in /tmp/nodefile:" echo " beeond-cp stagein -n /tmp/nodefile -g /mnt/beegfs-global/dataset -l /mnt/beeond" echo "" echo " Stage out modified data from BeeOND mounted at /mnt/beeond to the global " echo " storage:" echo " beeond-cp stageout -n /tmp/nodefile -g /mnt/beegfs-global/dataset -l /mnt/beeond" echo "" echo " Recursively copy the directories dir_1 and dir_2 to /mnt/beegfs, using the nodes" echo " in /tmp/nodefile:" echo " beeond-cp copy -n /tmp/nodefile dir_1 dir_2 /mnt/beegfs" echo "" echo "NOTE:" echo " BeeOND copy uses GNU Parallel -" echo " When using programs that use GNU Parallel to process data for publication" echo " please cite:" echo " O. Tange (2011): GNU Parallel - The Command-Line Power Tool," echo " ;login: The USENIX Magazine, February 2011:42-47." echo "" echo " SSH is used to log into the nodes specified in the nodefile. Please make" echo " sure your SSH configuration allows for enough concurrent sessions and pending" echo " logins. You might have to (ask your admin to) raise the MaxSessions and" echo " MaxStartups settings in the sshd_config file." echo "" echo " Also please make sure you have the access rights needed to write to the" echo " global store. Otherwise the stage-out might fail. Note that the access rights" echo " in the BeeOND local store do not necessarily reflect those in the global" echo " store." exit 1 } ### main functions do_start() { local NODEFILE="${1}" local GLOBAL_PATH="${2}" local LOCAL_PATH="${3}" beeond_print_info "BeeOND startup..." beeond_print_info "nodefile: ${NODEFILE}" beeond_print_info "global path: ${GLOBAL_PATH}" beeond_print_info "local path: ${LOCAL_PATH}" MISSING_PARAM=0 if [ "${NODEFILE}" = "" ] then beeond_print_error "No nodefile specified." MISSING_PARAM=1 fi if [ "${GLOBAL_PATH}" = "" ] then beeond_print_error "Global path not specified." MISSING_PARAM=1 fi if [ "${LOCAL_PATH}" = "" ] then beeond_print_error "Local path not specified." MISSING_PARAM=1 fi # Expand relative path to nodefile. if [ ! "${NODEFILE:0:1}" = "/" ] then NODEFILE="${PWD}/${NODEFILE}" fi if [ ! -e "${NODEFILE}" ] then beeond_print_error_and_exit "Node file does not exist." fi # The paths to the global and local directory have to be specified as absolute paths to prevent # user errors (like copying a lot of files to ~/mnt/beeond). if [ ! "${GLOBAL_PATH:0:1}" = "/" ] || [ ! "${LOCAL_PATH:0:1}" = "/" ] then beeond_print_error_and_exit "Global path and local path have to be absolute." fi [ "${MISSING_PARAM}" = "1" ] && exit 1 # Make sure target directory is empty before starting. if [ -e "${LOCAL_PATH}" ] then [ -d "${LOCAL_PATH}" ] \ || beeond_print_error_and_exit "Target path is not a directory." find "${LOCAL_PATH}" -maxdepth 0 -type d -empty | read -r _ \ || beeond_print_error_and_exit "Target directory is not empty." else mkdir -p "${LOCAL_PATH}" \ || beeond_print_error_and_exit "Cannot create target directory." fi local CONCURRENCY=$(( $(wc -l < "${NODEFILE}") )) beeond_print_info "Concurrency: ${CONCURRENCY}" beeond_print_info "Writing session information." beeond_save_session_info "${NODEFILE}" "${GLOBAL_PATH}" beeond_print_info "Starting stage-in..." NODES=( $(grep -v '^$' "${NODEFILE}" | uniq) ) # Store as array and ignore empty lines. NODELIST=$(IFS=,; echo "${NODES[*]}") # turn argument list into comma-separated string for PDSH beeond_stage_in "${GLOBAL_PATH}" "${LOCAL_PATH}" "${NODELIST}" ${CONCURRENCY} beeond_print_info "Done." } do_stop() { local LOCAL_PATH="${1}" if [ "${LOCAL_PATH}" = "" ] then beeond_print_error_and_exit "No path specified." fi # Expand relative local path. # Note: Don't have to ensure that it's an absolute path here: We confirm it's a BeeOND instance # by looking for the session info file. if [ ! "${LOCAL_PATH:0:1}" = "/" ] then LOCAL_PATH="${PWD}/${LOCAL_PATH}" fi # Read parameters from session info file. NODEFILE=$(grep NodeFile "${LOCAL_PATH}/${BEEOND_SESSION_FILE}" | cut -d = -f 2-) GLOBAL_PATH=$(grep GlobalPath "${LOCAL_PATH}/${BEEOND_SESSION_FILE}" | cut -d = -f 2-) if [ "${NODEFILE}" = "" ] then beeond_print_error "Error reading node file name from session file." MISSING_PARAM=1 fi if [ "${GLOBAL_PATH}" = "" ] then beeond_print_error "Error reading global path from session file." MISSING_PARAM=1 fi [ "${MISSING_PARAM}" = "1" ] && exit 1 if [ ! -e "${NODEFILE}" ] then beeond_print_error_and_exit "Node file does not exist." fi beeond_print_info "BeeOND shutdown..." beeond_print_info "nodefile: ${NODEFILE}" beeond_print_info "global path: ${GLOBAL_PATH}" beeond_print_info "local path: ${LOCAL_PATH}" NODES=( $(grep -v '^$' "${NODEFILE}" | uniq) ) # Store as array and ignore empty lines. NODELIST=$(IFS=,; echo "${NODES[*]}") local CONCURRENCY=$(( $(wc -l < "${NODEFILE}") )) beeond_print_info "Concurrency: ${CONCURRENCY}" beeond_stage_out "${GLOBAL_PATH}" "${LOCAL_PATH}" "${NODELIST}" ${CONCURRENCY} beeond_print_info "Done." } do_copy() { local NODEFILE="${1}" shift beeond_print_info "BeeOND copy..." if [ "${NODEFILE}" = "" ] then beeond_print_error "No nodefile specified." fi # Expand relative path to nodefile. if [ ! "${NODEFILE:0:1}" = "/" ] then NODEFILE="${PWD}/${NODEFILE}" fi if [ ! -e "${NODEFILE}" ] then beeond_print_error_and_exit "Node file does not exist." fi NODES=( $(grep -v '^$' "${NODEFILE}" | uniq) ) # Store as array and ignore empty lines. NODELIST=$(IFS=,; echo "${NODES[*]}") local CONCURRENCY=$(( $(wc -l < "${NODEFILE}") )) beeond_print_info "Concurrency: ${CONCURRENCY}" beeond_copy "${NODELIST}" "${CONCURRENCY}" "$@" } # Print help if no arguments given. if [ $# -eq 0 ] ; then print_usage_and_exit fi # Do it. ACTION="${1}" if [ "${ACTION}" = "stagein" ] then shift while getopts ":n:g:l:" opt; do case $opt in n) NODEFILE="${OPTARG}" ;; g) GLOBAL_PATH="${OPTARG}" ;; l) LOCAL_PATH="${OPTARG}" ;; \?) beeond_print_error_and_exit "Invalid option: -${OPTARG}." ;; :) beeond_print_error_and_exit "Option -${OPTARG} requires an argument." ;; esac done do_start "${NODEFILE}" "${GLOBAL_PATH}" "${LOCAL_PATH}" elif [ "${ACTION}" = "stageout" ] then shift while getopts ":l:" opt; do case $opt in l) LOCAL_PATH="${OPTARG}" ;; \?) beeond_print_error_and_exit "Invalid option: -${OPTARG}." ;; :) beeond_print_error_and_exit "Option -${OPTARG} requires an argument." ;; esac done do_stop "${LOCAL_PATH}" elif [ "${ACTION}" = "copy" ] then shift # Nodefile has to be given as the only command line argument. if getopts ":n:" opt then if [ "$opt" = "n" ] then NODEFILE="${OPTARG}" else beeond_print_error_and_exit "Invalid option: -${opt}" fi else beeond_print_error_and_exit "No nodefile specified." fi # All following command line arguments are file or directory names, specifying the sources and # the target of the copy ancion shift # shift out -n parameter shift # shift out name of node file do_copy "${NODEFILE}" "$@" elif [ "${ACTION}" = "info" ] then do_print_info else print_usage_and_exit fi