393 lines
14 KiB
Bash
393 lines
14 KiB
Bash
#!/bin/bash
|
|
|
|
# This file contains some functions used across all of the BeeOND scripts.
|
|
|
|
BEEOND_FILENAME_PREFIX=".beeond_"
|
|
BEEOND_COPY_FILE_LIST="${BEEOND_FILENAME_PREFIX}files_copy"
|
|
BEEOND_COPY_SCAN_LIST="${BEEOND_FILENAME_PREFIX}scan_list" # List of dirs that have to be scanned.
|
|
BEEOND_COPY_DIR_LIST="${BEEOND_FILENAME_PREFIX}dirs_copy"
|
|
BEEOND_START_FILE_LIST="${BEEOND_FILENAME_PREFIX}files_start"
|
|
BEEOND_END_FILE_LIST="${BEEOND_FILENAME_PREFIX}files_end"
|
|
BEEOND_END_UPDATED_FILE_LIST="${BEEOND_FILENAME_PREFIX}files_end_updated"
|
|
BEEOND_START_DIR_LIST="${BEEOND_FILENAME_PREFIX}dirs_start"
|
|
BEEOND_END_DIR_LIST="${BEEOND_FILENAME_PREFIX}dirs_end"
|
|
BEEOND_END_UPDATED_DIR_LIST="${BEEOND_FILENAME_PREFIX}dirs_end_updated"
|
|
BEEOND_SESSION_FILE="${BEEOND_FILENAME_PREFIX}session"
|
|
|
|
BEEOND_BATCH_SIZE=20
|
|
|
|
beeond_print_error()
|
|
{
|
|
echo "ERROR: ${1}" >&2
|
|
echo ""
|
|
}
|
|
|
|
beeond_print_error_and_exit()
|
|
{
|
|
beeond_print_error "${1}" >&2
|
|
exit 1
|
|
}
|
|
|
|
beeond_print_info()
|
|
{
|
|
local MESSAGE="${1}"
|
|
if [ "${QUIET}" != "true" ]
|
|
then
|
|
echo "INFO: ${MESSAGE}"
|
|
fi
|
|
}
|
|
|
|
# Saves the session info file which contains the paths of the global store and the node file.
|
|
beeond_save_session_info()
|
|
{
|
|
local NODEFILE="${1}"
|
|
local GLOBAL_PATH="${2}"
|
|
|
|
if ! printf "NodeFile=%q\nGlobalPath=%q\n" "${NODEFILE}" "${GLOBAL_PATH}" \
|
|
> "${LOCAL_PATH}/${BEEOND_SESSION_FILE}"
|
|
then
|
|
beeond_print_error_and_exit "Could not write to session file."
|
|
fi
|
|
}
|
|
|
|
# Generate the list of files in the beeond folder.
|
|
beeond_generate_file_list()
|
|
{
|
|
local LOCAL_PATH="${1}"
|
|
local LISTFILE="${2}"
|
|
local REFERENCE_FILE="${3}"
|
|
|
|
|
|
pushd "${LOCAL_PATH}"
|
|
if [ "${REFERENCE_FILE}" = "" ]
|
|
then # No reference file - just generate the full list (e.g. on startup).
|
|
|
|
beeond_print_info "Generating file list ${LISTFILE}..."
|
|
|
|
find . ! -path ./${BEEOND_FILENAME_PREFIX}\* \( -type f -or -type l \)\
|
|
-exec bash -c 'printf "%q\n" "$0"' {} \; \
|
|
| grep -v ^\\$ | sort > "${LISTFILE}"
|
|
|
|
# The grep statement filters out file names with newlines in them. While they are technically
|
|
# legal they would cause problems later on due to the way the script run by parallel handles
|
|
# the arguments.
|
|
|
|
else # Reference file given: Compare timestamps.
|
|
|
|
beeond_print_info \
|
|
"Generating file list ${LISTFILE}. Timestamp reference: ${REFERENCE_FILE}..."
|
|
|
|
find . ! -path ./${BEEOND_FILENAME_PREFIX}\* \( -type f -or -type l \) \
|
|
\( -cnewer "${REFERENCE_FILE}" -or -newer "${REFERENCE_FILE}" \) \
|
|
-exec bash -c 'printf "%q\n" "$0"' {} \; \
|
|
| grep -v ^\\$ | sort > "${LISTFILE}"
|
|
|
|
fi
|
|
popd
|
|
}
|
|
|
|
# Generate list of directories - this is necessary in case directories were created during the
|
|
# session and need to be created on the global store as well.
|
|
beeond_generate_directory_list()
|
|
{
|
|
local LOCAL_PATH="${1}"
|
|
local LISTFILE="${2}"
|
|
local REFERENCE_FILE="${3}"
|
|
|
|
pushd "${LOCAL_PATH}"
|
|
if [ "${REFERENCE_FILE}" = "" ]
|
|
then # No reference file - just generate the full list (e.g. on startup).
|
|
|
|
beeond_print_info "Generating directory list ${LISTFILE}..."
|
|
|
|
find . ! -path . -type d \
|
|
-exec bash -c 'printf "%q\n" "$0"' {} \; \
|
|
| grep -v ^\\$ | sort > "${LISTFILE}"
|
|
|
|
else # Reference file given: Compare timestamps.
|
|
|
|
beeond_print_info \
|
|
"Generating directory list ${LISTFILE}. Timestamp reference: ${REFERENCE_FILE}..."
|
|
|
|
find . ! -path . -type d \
|
|
\( -cnewer "${REFERENCE_FILE}" -or -newer "${REFERENCE_FILE}" \) \
|
|
-exec bash -c 'printf "%q\n" "$0"' {} \; \
|
|
| grep -v ^\\$ | sort > "${LISTFILE}"
|
|
|
|
fi
|
|
popd
|
|
}
|
|
|
|
# Generate copy file list. If we do a parallel copy, we can't just list the paths to all the files
|
|
# to be copied, because we have to "flatten" the folder hierarchy (e.g. when the user says
|
|
# "copy dir/subfir/file_a anotherdir/file_b target_dir" we want to end up with
|
|
# target_dir/file_a and target_dir/file_b. To achieve this, we just save an explicit target path
|
|
# to each source file. We also have to keep a list of directories we encounter because we want to
|
|
# create them before we start copying.
|
|
beeond_generate_copy_lists()
|
|
{
|
|
local TARGET="${1}"
|
|
local NODE_LIST="${2}"
|
|
local CONCURRENCY="${3}"
|
|
shift 3
|
|
|
|
# Note: We do relative path expansion here, (and not directly in the do_... functions)
|
|
# because this is the first time we iterate over the source list.
|
|
|
|
# Expand target path.
|
|
if [ ! "${TARGET:0:1}" = "/" ]
|
|
then
|
|
TARGET="${PWD}/${TARGET}"
|
|
fi
|
|
|
|
# Delete possibly left over file lists.
|
|
rm -f "${TARGET}/${BEEOND_COPY_SCAN_LIST}" \
|
|
"${TARGET}/${BEEOND_COPY_DIR_LIST}" \
|
|
"${TARGET}/${BEEOND_COPY_FILE_LIST}"
|
|
|
|
# Generate lists: A list of files which can be used directly, and a list of directories which
|
|
# have to be scanned first.
|
|
for ENTRY in "$@"
|
|
do
|
|
# Expand path if it's relative.
|
|
if [ ! "${ENTRY:0:1}" = "/" ]
|
|
then
|
|
ENTRY="${PWD}/${ENTRY}"
|
|
fi
|
|
|
|
beeond_print_info "Path to scan: ${ENTRY}"
|
|
if [ -d "${ENTRY}" ]; then
|
|
printf "%q\n" "${ENTRY}" >> "${TARGET}/${BEEOND_COPY_SCAN_LIST}"
|
|
elif [ -f "${ENTRY}" ]; then
|
|
printf "%q\n" >> "${TARGET}/${BEEOND_COPY_FILE_LIST}"
|
|
else
|
|
beeond_print_error_and_exit "File or directory does not exist: ${ENTRY}"
|
|
fi
|
|
done
|
|
|
|
beeond_print_info "Scanning sources..."
|
|
|
|
< "${TARGET}/${BEEOND_COPY_SCAN_LIST}" \
|
|
${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
|
|
-N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
|
|
" \
|
|
while read DIR; do \
|
|
cd \"\${DIR}\"; \
|
|
find . -type d -exec bash -c \\\'printf \"%s/%q\n\" \"\$0\" \"\$1\"\' \"\`basename \"\${DIR}\"\`\" \{\} \; \
|
|
| grep -v ^\\$; \
|
|
done;
|
|
" \
|
|
| sort > "${TARGET}/${BEEOND_COPY_DIR_LIST}"
|
|
|
|
< "${TARGET}/${BEEOND_COPY_SCAN_LIST}" \
|
|
${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
|
|
-N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
|
|
" \
|
|
while read DIR; do \
|
|
cd \"\${DIR}\"; \
|
|
find . \( -type f -or -type l \) -exec bash -c \
|
|
\\\'printf \"%q %q\n\" \"\${PWD}/\$0\" \"${TARGET}/\`basename \"\${PWD}\"\`/\$0\"\' \{\} \; \
|
|
| grep -v ^\\$; \
|
|
done; \
|
|
" \
|
|
| sort > "${TARGET}/${BEEOND_COPY_FILE_LIST}"
|
|
}
|
|
|
|
# Parallel copy of the files from a previously generated file list to the target directory.
|
|
# First, the directory structure is generated, then the files are copied into it.
|
|
beeond_parallel_copy()
|
|
{
|
|
local TARGET="${1}"
|
|
local NODE_LIST="${2}"
|
|
local CONCURRENCY="${3}"
|
|
|
|
# Expand target path.
|
|
if [ ! "${TARGET:0:1}" = "/" ]
|
|
then
|
|
TARGET="${PWD}/${TARGET}"
|
|
fi
|
|
|
|
beeond_print_info "Generating target directory structure..."
|
|
|
|
< "${TARGET}/${BEEOND_COPY_DIR_LIST}" \
|
|
${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
|
|
-N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
|
|
" \
|
|
while read DIR; do \
|
|
mkdir -pv \"${TARGET}/\${DIR}\"; \
|
|
done; \
|
|
"
|
|
|
|
beeond_print_info "Copying files..."
|
|
|
|
< "${TARGET}/${BEEOND_COPY_FILE_LIST}" \
|
|
${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
|
|
-N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
|
|
" \
|
|
while read -a LINE; do \
|
|
cp -av \"\${LINE[0]}\" \"\${LINE[1]}\"; \
|
|
done; \
|
|
"
|
|
|
|
# Delete temporary files.
|
|
# rm "${TARGET}/${BEEOND_COPY_SCAN_LIST}" \
|
|
# "${TARGET}/${BEEOND_COPY_DIR_LIST}" \
|
|
# "${TARGET}/${BEEOND_COPY_FILE_LIST}"
|
|
}
|
|
|
|
# Remove all files from the global store that have been deleted during the session.
|
|
beeond_remove_removed_files()
|
|
{
|
|
local GLOBAL_PATH="${1}"
|
|
local LOCAL_PATH="${2}"
|
|
local NODE_LIST="${3}"
|
|
local CONCURRENCY="${4}"
|
|
|
|
beeond_print_info "Deleting files:"
|
|
comm -23 "${LOCAL_PATH}/${BEEOND_START_FILE_LIST}" "${LOCAL_PATH}/${BEEOND_END_FILE_LIST}" | \
|
|
${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
|
|
-N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
|
|
" \
|
|
while read FILE; do \
|
|
rm -v \"${GLOBAL_PATH}/\${FILE}\"; \
|
|
done; \
|
|
"
|
|
|
|
beeond_print_info "Deleting directories:"
|
|
comm -23 "${LOCAL_PATH}/${BEEOND_START_DIR_LIST}" "${LOCAL_PATH}/${BEEOND_END_DIR_LIST}" | \
|
|
tac | \
|
|
xargs -I{} rmdir -v "${GLOBAL_PATH}/{}"
|
|
# Not being done in parallel to avoid deleting a subdirectory before its parent (this is also
|
|
# the reason the list is inverted (tac)).
|
|
}
|
|
|
|
# Copy back all files to the global store that have been updated during the session.
|
|
beeond_copy_updated_files()
|
|
{
|
|
local GLOBAL_PATH="${1}"
|
|
local LOCAL_PATH="${2}"
|
|
local NODE_LIST="${3}"
|
|
local CONCURRENCY="${4}"
|
|
|
|
beeond_print_info "Creating new directories:"
|
|
|
|
< "${LOCAL_PATH}/${BEEOND_END_DIR_LIST}" \
|
|
${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
|
|
-N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
|
|
" \
|
|
while read DIR; do \
|
|
mkdir -pv \"${GLOBAL_PATH}/\${DIR}\"; \
|
|
done; \
|
|
"
|
|
|
|
beeond_print_info "Copying back changed files:"
|
|
|
|
< "${LOCAL_PATH}/${BEEOND_END_UPDATED_FILE_LIST}" \
|
|
${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
|
|
-N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
|
|
" \
|
|
while read FILE; do \
|
|
cp -uv \"${LOCAL_PATH}/\${FILE}\" \"${GLOBAL_PATH}/\${FILE}\"; \
|
|
done; \
|
|
"
|
|
|
|
# Copy files into updated directories. (When a directory is renamed or files
|
|
# are moved to a directory, the files in it don't have their timestamp
|
|
# updated. Therefore, we need to check all the updated directories again).
|
|
|
|
beeond_print_info "Copying back changed files (updated dirs):"
|
|
|
|
pushd "${LOCAL_PATH}"
|
|
< "${LOCAL_PATH}/${BEEOND_END_UPDATED_DIR_LIST}" \
|
|
xargs -I{} find {} -maxdepth 1 \( -type f -or -type l \) | \
|
|
${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
|
|
-N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
|
|
" \
|
|
while read FILE; do \
|
|
cp -uv \"${LOCAL_PATH}/\${FILE}\" \"\`dirname \"${GLOBAL_PATH}/\${FILE}\"\`\"; \
|
|
done; \
|
|
"
|
|
popd
|
|
}
|
|
|
|
# Stage in process: Copy all files from the global store to the local store.
|
|
beeond_stage_in()
|
|
{
|
|
local GLOBAL_PATH="${1}"
|
|
local LOCAL_PATH="${2}"
|
|
local NODE_LIST="${3}"
|
|
local CONCURRENCY="${4}"
|
|
|
|
# Generate list of files that have to be copied.
|
|
beeond_generate_file_list "${GLOBAL_PATH}" "${LOCAL_PATH}/${BEEOND_START_FILE_LIST}"
|
|
beeond_generate_directory_list "${GLOBAL_PATH}" "${LOCAL_PATH}/${BEEOND_START_DIR_LIST}"
|
|
|
|
beeond_print_info "Creating directory structure..."
|
|
< "${LOCAL_PATH}/${BEEOND_START_DIR_LIST}" \
|
|
${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
|
|
-N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
|
|
" \
|
|
while read DIR; do \
|
|
mkdir -pv \"${LOCAL_PATH}/\${DIR}\"; \
|
|
touch --reference=\"${GLOBAL_PATH}/\${DIR}\" \"${LOCAL_PATH}/\${DIR}\"; \
|
|
done; \
|
|
"
|
|
|
|
beeond_print_info "Copying files to local directory..."
|
|
if ! < "${LOCAL_PATH}/${BEEOND_START_FILE_LIST}" \
|
|
${PARALLEL} -S "${NODE_LIST}" -j"${CONCURRENCY}" --pipe --recend "\n" \
|
|
-N${BEEOND_BATCH_SIZE} --controlmaster --will-cite \
|
|
" \
|
|
while read FILE; do \
|
|
cp -av \"${GLOBAL_PATH}/\${FILE}\" \"\`dirname \"${LOCAL_PATH}/\${FILE}\"\`\"/; \
|
|
done;
|
|
"
|
|
then
|
|
beeond_print_error "Stage-in copy did not succeed. Data is incompletely staged in."
|
|
fi
|
|
|
|
# Generate list of files and dirs that were actually copied to keep track if the user deletes
|
|
# files during a session. (re-generate list here, so that if something went wrong during the
|
|
# stage-in copy, we don't start deleting stuff from the global store by accident).
|
|
beeond_generate_file_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_START_FILE_LIST}"
|
|
beeond_generate_directory_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_START_DIR_LIST}"
|
|
}
|
|
|
|
# Stage out process: remove all the files that have been removed during the beeond session,
|
|
# and copy back the files which have been changed.
|
|
beeond_stage_out()
|
|
{
|
|
local GLOBAL_PATH="${1}"
|
|
local LOCAL_PATH="${2}"
|
|
local NODE_LIST="${3}"
|
|
local CONCURRENCY="${4}"
|
|
|
|
beeond_print_info "Nodes for parallel stage out: ${NODE_LIST}."
|
|
|
|
beeond_generate_file_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_END_FILE_LIST}"
|
|
beeond_generate_file_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_END_UPDATED_FILE_LIST}" \
|
|
"${BEEOND_START_FILE_LIST}"
|
|
|
|
beeond_generate_directory_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_END_DIR_LIST}"
|
|
beeond_generate_directory_list "${LOCAL_PATH}" "${LOCAL_PATH}/${BEEOND_END_UPDATED_DIR_LIST}" \
|
|
"${BEEOND_START_DIR_LIST}"
|
|
|
|
beeond_remove_removed_files "${GLOBAL_PATH}" "${LOCAL_PATH}" "${NODE_LIST}" "${CONCURRENCY}"
|
|
|
|
beeond_copy_updated_files "${GLOBAL_PATH}" "${LOCAL_PATH}" "${NODE_LIST}" "${CONCURRENCY}"
|
|
}
|
|
|
|
# Parallel copy process: copy all files and directories (recursively) into the target directory.
|
|
beeond_copy()
|
|
{
|
|
local NODE_LIST="${1}"
|
|
local CONCURRENCY="${2}"
|
|
shift 2
|
|
|
|
beeond_print_info "Nodes for parallel copy: ${NODE_LIST}; Concurrency: ${CONCURRENCY}"
|
|
|
|
beeond_generate_copy_lists "${@:$#}" "${NODE_LIST}" "${CONCURRENCY}" "${@:1:$#-1}"
|
|
beeond_parallel_copy "${@:$#}" "${NODE_LIST}" "${CONCURRENCY}"
|
|
|
|
}
|