Imported Upstream version 1.5.1
This commit is contained in:
403
contrib/benchmark/benchmark.sh
Executable file
403
contrib/benchmark/benchmark.sh
Executable file
@@ -0,0 +1,403 @@
|
||||
#!/bin/bash
|
||||
|
||||
# the timeout for one execution of xtfs_benchmark
|
||||
TIMEOUT=1500
|
||||
|
||||
# the timeout for one cleanup
|
||||
TIMEOUT_CLEANUP=300
|
||||
|
||||
# the time to sleep after a cleanup
|
||||
SLEEPTIME=600
|
||||
|
||||
# if false, the script will not sleep after a cleanup and after drop_caches
|
||||
# (-v will set false)
|
||||
SLEEP=true
|
||||
|
||||
# the size of the basefile for random benchmarks
|
||||
BASEFILE_SIZE="100g"
|
||||
|
||||
# the directories for the logfiles and the results
|
||||
LOG_BASE=${BENCH_LOG:-$HOME}
|
||||
LOG_DIR="$LOG_BASE/log"
|
||||
RESULT_DIR="$LOG_BASE/result"
|
||||
|
||||
# Drops caches after each benchmark. Uncomment to activate
|
||||
# cp "drop_caches" to "/usr/local/bin" and add "ALL ALL=NOPASSWD: /usr/local/bin/drop_caches" to sudoers file
|
||||
DROP_CACHES=${BENCH_DROP_CACHES:-"/usr/local/bin/drop_caches"}
|
||||
if [[ $DROP_CACHES != "false" ]]; then
|
||||
DROP_CACHES_CALL="sudo ${DROP_CACHES}"
|
||||
fi
|
||||
|
||||
# IP and Port of the DIR
|
||||
DIR=${BENCH_DIR:-"localhost:32638"}
|
||||
|
||||
# IP and Port of the MRC
|
||||
MRC=${BENCH_MRC:-"localhost:32636"}
|
||||
|
||||
# space separed list of OSD_UUIDS, e.g. "osd1 osd2 ..."
|
||||
OSD_UUIDS=${BENCH_OSD_UUIDS:-"test-osd0"}
|
||||
|
||||
# stripe size for a volume
|
||||
STRIPE_SIZE="128K"
|
||||
|
||||
# request size for each I/O operation
|
||||
REQUEST_SIZE=$STRIPE_SIZE
|
||||
|
||||
# replication settings
|
||||
REPLICATION_POLICY=""
|
||||
REPLICATION_FACTOR=1
|
||||
|
||||
check_env(){
|
||||
# check XTREEMFS
|
||||
if [ -z "$XTREEMFS" ]; then
|
||||
if [ -d java -a -d cpp -a -d etc ]; then
|
||||
#echo "Looks like you are in an XtreemFS base directory..."
|
||||
XTREEMFS=`pwd`
|
||||
elif [ -d ../java -a -d ../cpp -a -d ../etc ]; then
|
||||
#echo "XTREEMFS base could be the parent directory..."
|
||||
XTREEMFS=`pwd`/..
|
||||
fi
|
||||
fi
|
||||
if [ ! -e "$XTREEMFS/java/servers/dist/XtreemFS.jar" -a ! -d "$XTREEMFS/java/lib" -a ! -f "/usr/share/java/XtreemFS.jar" ];
|
||||
then
|
||||
echo "XtreemFS jar could not be found!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check JAVA_HOME
|
||||
if [ -z "$JAVA_HOME" -a ! -f "/usr/bin/java" ]; then
|
||||
echo "\$JAVA_HOME not set, JDK/JRE 1.6 required"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$JAVA_HOME" ]; then
|
||||
JAVA_HOME=/usr
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
printUsage() {
|
||||
cat << EOF
|
||||
|
||||
Synopsis
|
||||
$(basename $0) -t TYPE -s NUMBER [-x NUMBER] [-p POLICY -f NUMBER] [-b NUMBER -e NUMBER] [-r NUMBER] [-v]
|
||||
Run a XtreemFS benchmark series, i.e. a series of benchmarks with increasing
|
||||
numbers of threads. Logs are placed in \$HOME/log/, results in \$HOME/results
|
||||
(can be changed at the head of the script).
|
||||
|
||||
-t type
|
||||
Type of benchmarks to run. Type can be either of the following:
|
||||
sw sequential write
|
||||
usw unaligned sequential write
|
||||
sr sequential read
|
||||
rw random write
|
||||
rr random read
|
||||
|
||||
-s size
|
||||
Size of one benchmark, modifier K (for KiB), M (for MiB) or G (for GiB) is mandatory.
|
||||
|
||||
-c size
|
||||
Size of each read/write request, modifier K (for KiB), M (for MiB) or G (for GiB) is mandatory.
|
||||
Defaults to 128K.
|
||||
|
||||
-i size
|
||||
Stripe size for each volume, modifier K (for KiB), M (for MiB) or G (for GiB) is mandatory.
|
||||
Defaults to 128K.
|
||||
|
||||
-p policy
|
||||
Replication policy to use. Defaults to none.
|
||||
|
||||
-f factor
|
||||
Replication factor to use. Defaults to 1.
|
||||
|
||||
-b number of threads to beginn the benchmark series
|
||||
Minimum number of threads to be run as the benchmarks series.
|
||||
The series will run benchmarks between the 'begin' and the 'end' number of threads.
|
||||
|
||||
-e number of threads to end the benchmark series
|
||||
Maximum number of threads to be run as the benchmarks series.
|
||||
The series will run benchmarks between the 'begin' and the 'end' number of threads.
|
||||
|
||||
-r repetitions
|
||||
Number of times a benchmark is repeated.
|
||||
|
||||
-v verbose
|
||||
If set, bash debugging is enabled ('set -x') and sleeping after the benchmarks
|
||||
is disabled.
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
init_params(){
|
||||
|
||||
check_env
|
||||
|
||||
if ! [ -d $LOG_DIR ]; then
|
||||
echo "$LOG_DIR doesn't existing. Creating $LOG_DIR..."
|
||||
mkdir -p $LOG_DIR
|
||||
fi
|
||||
if ! [ -d $RESULT_DIR ]; then
|
||||
echo "$RESULT_DIR doesn't existing. Creating $RESULT_DIR"
|
||||
mkdir -p $RESULT_DIR
|
||||
fi
|
||||
|
||||
THREADS="$(seq $BEGIN $END)"
|
||||
REPETITIONS="$(seq 1 $REPETITIONS)"
|
||||
|
||||
# use second resolution in case multiple benchmarks are run per minute
|
||||
NOW=$(date +"%y-%m-%d_%H-%M-%S")
|
||||
# redirect stdout and stderr
|
||||
exec 2> >(tee $LOG_DIR/$TYPE-$NOW.log)
|
||||
exec > >(tee $RESULT_DIR/$TYPE-$NOW.csv)
|
||||
|
||||
BASEFILE_SIZE=$(parse_size $BASEFILE_SIZE)
|
||||
REQUEST_SIZE=$(parse_size $REQUEST_SIZE)
|
||||
STRIPE_SIZE=$(parse_size $STRIPE_SIZE)
|
||||
|
||||
}
|
||||
|
||||
|
||||
parse_size(){
|
||||
local size_with_modifier=$1
|
||||
local index=$(echo `expr match $size_with_modifier '[0-9]\+'`)
|
||||
local size=${size_with_modifier:0:$index}
|
||||
local modifier=${size_with_modifier:$index}
|
||||
|
||||
if [ $index != ${#size_with_modifier} ]; then
|
||||
if [ $modifier = "K" ] || [ $modifier = "k" ]; then
|
||||
size=$(echo "$size*2^10" | bc)
|
||||
elif [ $modifier = "M" ] || [ $modifier = "m" ]; then
|
||||
size=$(echo "$size*2^20" | bc)
|
||||
elif [ $modifier = "G" ] || [ $modifier = "g" ]; then
|
||||
size=$(echo "$size*2^30" | bc)
|
||||
else
|
||||
echo "Wrong size modifier. Only 'M' and 'G' are allowed"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo $size
|
||||
}
|
||||
|
||||
|
||||
|
||||
prepare_seq_read(){
|
||||
|
||||
local size=$1
|
||||
local threads=$2
|
||||
|
||||
# declare array of volume names
|
||||
local volume_index=$(echo "$threads-1" | bc)
|
||||
for i in $(seq 0 $volume_index); do VOLUMES[$i]=benchmark$i; done
|
||||
|
||||
echo -e "\nPreparing sequential read benchmarks\n" >&2
|
||||
for i in $(seq 1 $threads); do
|
||||
local index=$(echo "$i-1"|bc)
|
||||
timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -sw -ssize $1 --no-cleanup --user $USER \
|
||||
${VOLUMES[$index]} --stripe-size $STRIPE_SIZE --chunk-size $REQUEST_SIZE
|
||||
done
|
||||
}
|
||||
|
||||
prepare_random(){
|
||||
|
||||
local threads=$1
|
||||
|
||||
# declare array of volume names
|
||||
local volume_index=$(echo "$threads-1" | bc)
|
||||
for i in $(seq 0 $volume_index); do VOLUMES[$i]=benchmark$i; done
|
||||
|
||||
# calc basefile size and round to a number divideable through REQUEST_SIZE
|
||||
local basefile_size=$(echo "(($BASEFILE_SIZE/$threads)/$REQUEST_SIZE)*$REQUEST_SIZE" | bc)
|
||||
|
||||
echo -e "\nPreparing random benchmark: Creating a basefiles\n" >&2
|
||||
for i in $(seq 1 $threads); do
|
||||
local index=$(echo "$i-1"|bc)
|
||||
timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -rr -rsize $REQUEST_SIZE --no-cleanup-basefile --no-cleanup-volumes --user $USER \
|
||||
--basefile-size $basefile_size ${VOLUMES[$index]} --stripe-size $STRIPE_SIZE --chunk-size $REQUEST_SIZE
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
run_benchmark(){
|
||||
local benchType=$1
|
||||
local size=$2
|
||||
local threads=$3
|
||||
local replicationOpt=""
|
||||
if [[ $REPLICATION_POLICY != "" ]]; then
|
||||
replicationOpt="--replication-policy $REPLICATION_POLICY"
|
||||
fi
|
||||
|
||||
if [ $benchType = "sr" ]; then
|
||||
XTREEMFS=$XTREEMFS timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -$benchType -ssize $size -n $threads --no-cleanup-volumes --user $USER \
|
||||
$replicationOpt --replication-factor $REPLICATION_FACTOR --chunk-size $REQUEST_SIZE --stripe-size $STRIPE_SIZE
|
||||
elif [ $benchType = "sw" ] || [ $benchType = "usw" ]; then
|
||||
XTREEMFS=$XTREEMFS timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -$benchType -ssize $size -n $threads --user $USER \
|
||||
$replicationOpt --replication-factor $REPLICATION_FACTOR --chunk-size $REQUEST_SIZE --stripe-size $STRIPE_SIZE
|
||||
elif [ $benchType = "rw" ] || [ $benchType = "rr" ]; then
|
||||
# calc basefile size and round to a number divideable through REQUEST_SIZE
|
||||
local basefile_size=$(echo "(($BASEFILE_SIZE/$threads)/$REQUEST_SIZE)*$REQUEST_SIZE" | bc)
|
||||
XTREEMFS=$XTREEMFS timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -$benchType -rsize $size --basefile-size $basefile_size -n $threads \
|
||||
--no-cleanup-basefile --no-cleanup-volumes --user $USER \
|
||||
$replicationOpt --replication-factor $REPLICATION_FACTOR --chunk-size $REQUEST_SIZE --stripe-size $STRIPE_SIZE
|
||||
fi
|
||||
|
||||
local bench_exit_status=$?
|
||||
if [ $bench_exit_status -eq 124 ]; then
|
||||
echo "The benchmark timed out (Timeout: $TIMEOUT)" >&2
|
||||
interrupted_exit
|
||||
elif [ $bench_exit_status -ne 0 ]; then
|
||||
echo "The benchmark did not finish with exit status 0" >&2
|
||||
interrupted_exit
|
||||
fi
|
||||
|
||||
# cleanup after *every* benchmark only for seq write benchmark
|
||||
if [ $benchType = "sr" ]; then
|
||||
cleanup_osd
|
||||
fi
|
||||
}
|
||||
|
||||
delete_volumes(){
|
||||
local number_of_threads=$1
|
||||
local volume_index=$(echo "$number_of_threads-1" | bc)
|
||||
for i in $(seq 0 $volume_index); do
|
||||
rmfs.xtreemfs -f $MRC/benchmark$i >/dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Removed volume benchmark$i" >&2
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
cleanup_osd(){
|
||||
for osd in $OSD_UUIDS; do
|
||||
timeout --foreground $TIMEOUT_CLEANUP $XTREEMFS/bin/xtfs_cleanup -dir pbrpc://$DIR -wait -e -delete_volumes uuid:$osd >&2
|
||||
done
|
||||
if $SLEEP; then
|
||||
echo "Start Sleeping for $(echo "$SLEEPTIME/60"|bc) minutes at $(date)" >&2
|
||||
sleep $SLEEPTIME
|
||||
echo "Finished Sleeping at $(date)" >&2
|
||||
fi
|
||||
drop_caches
|
||||
}
|
||||
|
||||
interrupted_exit(){
|
||||
echo "Unexpected exit, cleaning up..." >&2
|
||||
SLEEP=false
|
||||
delete_volumes $END
|
||||
cleanup_osd
|
||||
exit 1
|
||||
}
|
||||
|
||||
drop_caches(){
|
||||
if [ -n "$DROP_CACHES_CALL" ]; then
|
||||
echo "Dropping caches" >&2
|
||||
$DROP_CACHES_CALL
|
||||
if $SLEEP; then
|
||||
sleep 10
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
##### main ###
|
||||
|
||||
trap "echo; echo 'Interrupt received '; interrupted_exit" INT
|
||||
|
||||
# show usage if invoked without options/arguments
|
||||
if [ $# -eq 0 ]; then
|
||||
printUsage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# default values
|
||||
BEGIN=1
|
||||
END=1
|
||||
REPETITIONS=1
|
||||
|
||||
|
||||
# parse options
|
||||
while getopts ":t:s:c:i:b:e:r:p:f:v" opt; do
|
||||
case $opt in
|
||||
t)
|
||||
if [ $OPTARG = "sw" ] || [ $OPTARG = "usw" ] || [ $OPTARG = "sr" ] || [ $OPTARG = "rw" ] || [ $OPTARG = "rr" ]; then
|
||||
TYPE=$OPTARG
|
||||
else
|
||||
echo 'wrong argument to -t. Needs to be either "sw", "usw", "sr", "rw" or "rr"'
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
s)
|
||||
SIZE=$(parse_size $OPTARG)
|
||||
;;
|
||||
c)
|
||||
REQUEST_SIZE=$(parse_size $OPTARG)
|
||||
;;
|
||||
i)
|
||||
STRIPE_SIZE=$(parse_size $OPTARG)
|
||||
;;
|
||||
b)
|
||||
BEGIN=$OPTARG
|
||||
;;
|
||||
e)
|
||||
END=$OPTARG
|
||||
;;
|
||||
r)
|
||||
REPETITIONS=$OPTARG
|
||||
;;
|
||||
p)
|
||||
REPLICATION_POLICY=$OPTARG
|
||||
;;
|
||||
f)
|
||||
REPLICATION_FACTOR=$OPTARG
|
||||
;;
|
||||
v)
|
||||
SLEEP=false
|
||||
set -x
|
||||
;;
|
||||
\?)
|
||||
echo "Invalid option: -$OPTARG" >&2
|
||||
exit 1
|
||||
;;
|
||||
:)
|
||||
echo "Option -$OPTARG requires an argument." >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
init_params
|
||||
drop_caches
|
||||
|
||||
echo "Running:" $0 $@ >&2
|
||||
|
||||
for i in $THREADS; do
|
||||
size="$(echo "$SIZE/$i"|bc)"
|
||||
if [ $TYPE != "usw" ]; then
|
||||
size="$(echo "($size/$REQUEST_SIZE)*$REQUEST_SIZE" | bc)" # round down to a size divideable through the REQUEST_SIZE
|
||||
fi
|
||||
|
||||
if [ $TYPE = "sr" ]; then
|
||||
prepare_seq_read $size $i
|
||||
cleanup_osd
|
||||
elif [ $TYPE = "rw" ] || [ $TYPE = "rr" ]; then
|
||||
prepare_random $i
|
||||
fi
|
||||
|
||||
for j in $REPETITIONS; do
|
||||
echo "Start $i-Thread-Benchmark Nr. $j" >&2
|
||||
|
||||
run_benchmark $TYPE $size $i
|
||||
|
||||
echo "Finished $i-Thread-Benchmark Nr. $j" >&2
|
||||
|
||||
done
|
||||
|
||||
# seq write benchmarks run cleanup after every benchmark, so this would be redundant
|
||||
if [ $TYPE != "sw" ] && [ $TYPE != "usw" ]; then
|
||||
volume_index=$(echo "$i-1" | bc)
|
||||
for i in $(seq 0 $volume_index); do
|
||||
rmfs.xtreemfs -f $MRC/benchmark$i >&2
|
||||
echo "Remove volume benchmark$i" >&2
|
||||
done
|
||||
cleanup_osd
|
||||
fi
|
||||
|
||||
done
|
||||
3
contrib/benchmark/drop_caches
Executable file
3
contrib/benchmark/drop_caches
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/bash
|
||||
|
||||
/bin/bash -c "echo 3 > /proc/sys/vm/drop_caches"
|
||||
69
contrib/filter-MRC-dump-with-XSLT/filter_files.xslt
Normal file
69
contrib/filter-MRC-dump-with-XSLT/filter_files.xslt
Normal file
@@ -0,0 +1,69 @@
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:date="http://exslt.org/dates-and-times"
|
||||
extension-element-prefixes="date">
|
||||
|
||||
<!--
|
||||
Copyright (c) 2013 by Michael Berlin, Zuse Institute Berlin
|
||||
|
||||
Licensed under the BSD License, see LICENSE file for details.
|
||||
|
||||
This file transforms a MRC database dump (in XML format) into
|
||||
a list of files. The output format is as follows:
|
||||
|
||||
volume name/path on volume|creation time|file size|file's owner name
|
||||
|
||||
The current version lists only files which are placed on an OSD
|
||||
with the UUID 'zib.mosgrid.osd15' (see line 34).
|
||||
|
||||
You can use the 'xsltproc' tool to apply this transformation to a XML dump.
|
||||
|
||||
Example: xsltproc -o filtered_files_output.txt filter_files.xslt /tmp/dump.xml
|
||||
-->
|
||||
|
||||
<xsl:output omit-xml-declaration="yes"/>
|
||||
|
||||
<!--Strip off white space from all elements. We take care of the format on our own.-->
|
||||
<xsl:strip-space elements="*"/>
|
||||
|
||||
<!--For each volume, process its "file" elements.-->
|
||||
<xsl:template match="volume">
|
||||
<xsl:apply-templates select="//file"/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="file[xlocList/xloc/osd/@location='zib.mosgrid.osd15']">
|
||||
<!--Traverse the path of the <file> element and output the 'name' attribute of
|
||||
each element to display the file system path.
|
||||
The first entry is the name of the volume.-->
|
||||
<xsl:for-each select="ancestor-or-self::*/@name">
|
||||
<!--We ignore the <volume> element because its name is repeated as <dir> element below.-->
|
||||
<xsl:if test="local-name(..) != 'volume'">
|
||||
|
||||
<!--Output path element.-->
|
||||
<xsl:value-of select="."/>
|
||||
|
||||
<xsl:if test="position() != last()">
|
||||
<!--Display separator.-->
|
||||
<xsl:text>/</xsl:text>
|
||||
</xsl:if>
|
||||
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<!--Creation time.-->
|
||||
<xsl:text>|</xsl:text>
|
||||
<xsl:value-of select="date:add('1970-01-01T00:00:00Z', date:duration(@ctime))"/>
|
||||
|
||||
<!--File size.-->
|
||||
<xsl:text>|</xsl:text>
|
||||
<xsl:value-of select="@size"/>
|
||||
|
||||
<!--Owner.-->
|
||||
<xsl:text>|</xsl:text>
|
||||
<xsl:value-of select="@uid"/>
|
||||
|
||||
<!--New line.-->
|
||||
<xsl:text>
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
17
contrib/ganglia-plugin/.project
Normal file
17
contrib/ganglia-plugin/.project
Normal file
@@ -0,0 +1,17 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>ganglia-plugin</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.python.pydev.PyDevBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.python.pydev.pythonNature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
||||
10
contrib/ganglia-plugin/.pydevproject
Normal file
10
contrib/ganglia-plugin/.pydevproject
Normal file
@@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?eclipse-pydev version="1.0"?>
|
||||
|
||||
<pydev_project>
|
||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
|
||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
|
||||
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
|
||||
<path>/ganglia-plugin/src</path>
|
||||
</pydev_pathproperty>
|
||||
</pydev_project>
|
||||
6
contrib/ganglia-plugin/README.txt
Normal file
6
contrib/ganglia-plugin/README.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
How to use this plugin?
|
||||
|
||||
- Make sure your ganglia installation supports python plugins.
|
||||
- Copy plugin files from src directory to /usr/lib/ganglia/python_modules/
|
||||
- Copy configuration files configuration-files directory to /etc/ganglia/conf.d
|
||||
- Alter configuration files to suite your needs.
|
||||
61
contrib/ganglia-plugin/config-files/xtfs-dir.pyconf
Normal file
61
contrib/ganglia-plugin/config-files/xtfs-dir.pyconf
Normal file
@@ -0,0 +1,61 @@
|
||||
modules {
|
||||
module {
|
||||
name = "xtfs-dir-plugin"
|
||||
language = "python"
|
||||
# The following params are examples only
|
||||
param Host {
|
||||
value = localhost
|
||||
}
|
||||
param Port {
|
||||
value = 9001
|
||||
}
|
||||
param CommunityString {
|
||||
value = public
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collection_group {
|
||||
collect_every = 60
|
||||
time_threshold = 10
|
||||
metric {
|
||||
name = "dir_jvm_used_mem"
|
||||
title = "used memory of the jvm"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "dir_jvm_free_mem"
|
||||
title = "free memory of the jvm"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "dir_client_connections"
|
||||
title = "number of Clients"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "dir_pending_requests"
|
||||
title = "number of pending requests"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "addr_mapping_count"
|
||||
title = "number of address mappings"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "service_count"
|
||||
title = "number of services"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "dir_status"
|
||||
title = "Status DIR"
|
||||
}
|
||||
metric {
|
||||
name = "dir_uuid"
|
||||
title = "DIR UUID"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
56
contrib/ganglia-plugin/config-files/xtfs-mrc.pyconf
Normal file
56
contrib/ganglia-plugin/config-files/xtfs-mrc.pyconf
Normal file
@@ -0,0 +1,56 @@
|
||||
modules {
|
||||
module {
|
||||
name = "xtfs-mrc-plugin"
|
||||
language = "python"
|
||||
# The following params are examples only
|
||||
param Host {
|
||||
value = localhost
|
||||
}
|
||||
param Port {
|
||||
value = 9002
|
||||
}
|
||||
param CommunityString {
|
||||
value = public
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collection_group {
|
||||
collect_every = 60
|
||||
time_threshold = 10
|
||||
metric {
|
||||
name = "mrc_jvm_used_mem"
|
||||
title = "used memory of the jvm"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "mrc_jvm_free_mem"
|
||||
title = "free memory of the jvm"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "mrc_client_connections"
|
||||
title = "number of Clients"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "mrc_pending_requests"
|
||||
title = "number of pending requests"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "volumes_count"
|
||||
title = "number of volumes"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "mrc_status"
|
||||
title = "Status MRC"
|
||||
}
|
||||
metric {
|
||||
name = "mrc_uuid"
|
||||
title = "MRC UUID"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
107
contrib/ganglia-plugin/config-files/xtfs-osd.pyconf
Normal file
107
contrib/ganglia-plugin/config-files/xtfs-osd.pyconf
Normal file
@@ -0,0 +1,107 @@
|
||||
modules {
|
||||
module {
|
||||
name = "xtfs-osd-plugin"
|
||||
language = "python"
|
||||
# The following params are examples only
|
||||
param Host {
|
||||
value = localhost
|
||||
}
|
||||
param Port {
|
||||
value = 9003
|
||||
}
|
||||
param CommunityString {
|
||||
value = public
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collection_group {
|
||||
collect_every = 60
|
||||
time_threshold = 10
|
||||
metric {
|
||||
name = "osd_jvm_used_mem"
|
||||
title = "used memory of the jvm"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "osd_jvm_free_mem"
|
||||
title = "free memory of the jvm"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "osd_client_connections"
|
||||
title = "number of Clients"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "objects_received"
|
||||
title = "objects received"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "repl_objects_received"
|
||||
title = "replicated objects received"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "objects_transmitted"
|
||||
title = "objects transmitted"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "repl_bytes_received"
|
||||
title = "replicated bytes received"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "bytes_received"
|
||||
title = "bytes received"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "bytes_transmitted"
|
||||
title = "bytes transmitted"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "preproc_queue_length"
|
||||
title = "preprocessing stage queue length"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "storage_queue_length"
|
||||
title = "storage stage queue length"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "deletion_queue_length"
|
||||
title = "deletion stage queue length"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "open_files"
|
||||
title = "open files"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "deleted_files"
|
||||
title = "deleted files"
|
||||
value_threshold = 1
|
||||
}
|
||||
metric {
|
||||
name = "free_space"
|
||||
title = "free space"
|
||||
value_threshold = 100
|
||||
}
|
||||
metric {
|
||||
name = "osd_status"
|
||||
title = "Status OSD"
|
||||
}
|
||||
metric {
|
||||
name = "osd_uuid"
|
||||
title = "OSD UUID"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
250
contrib/ganglia-plugin/src/xtfs-dir-plugin.py
Normal file
250
contrib/ganglia-plugin/src/xtfs-dir-plugin.py
Normal file
@@ -0,0 +1,250 @@
|
||||
'''
|
||||
Created on May 25, 2011
|
||||
|
||||
@author: bzcseife
|
||||
|
||||
This is a python ganglia plugin which monitors the status of an DIR service of the XtreemFS
|
||||
filesystem. It is intend to run on the same host as the DIR and gathers information of the DIR per
|
||||
SNMP. Therefore you have to configure your DIR to provide a SNMP Agent on this host.
|
||||
|
||||
'''
|
||||
#TODO: If ganglia supports 64bit values uses 64bit integers instead of converting all 64 bit integers
|
||||
#reported from the SNMP Agent to 32bit integers.
|
||||
|
||||
|
||||
import random
|
||||
from pysnmp.entity.rfc3413.oneliner import cmdgen
|
||||
from pysnmp.entity.rfc3413.oneliner.cmdgen import UdpTransportTarget
|
||||
|
||||
|
||||
descriptors = list()
|
||||
Random_Max = 50
|
||||
Constant_Value = 50
|
||||
|
||||
|
||||
|
||||
#Get the used memory of the JVM
|
||||
def JvmUsedMem(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 1, 0))
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
|
||||
return int(varBinds[0][1]/1024/1024)
|
||||
else:
|
||||
return 0
|
||||
#Get the free memory of the JVM
|
||||
def JvmFreeMem(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 2, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1] / 1024 / 1024)
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the number of client connections
|
||||
def ClientConnections(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 8, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
#Get the number of pending requests
|
||||
def PendingRequests(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 9, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the number of address mappings registered
|
||||
def AddressMappingCount(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 2, 1, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the number of services registered
|
||||
def ServiceCount(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 2, 2, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
#get the status of the DIR
|
||||
#OID: 1.3.6.1.4.1.38350.1.11.0
|
||||
def Status(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 11, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return str(varBinds[0][1])
|
||||
else:
|
||||
return "OFFLINE"
|
||||
|
||||
#get the UUID of the DIR
|
||||
#OID: 1.3.6.1.4.1.38350.1.13.0
|
||||
def Uuid(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 13, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return str(varBinds[0][1])
|
||||
else:
|
||||
return "Service not available"
|
||||
|
||||
def metric_init(params):
|
||||
|
||||
global descriptors
|
||||
global Commmunity_String
|
||||
global Snmp_Port
|
||||
global authData
|
||||
global transportTarget
|
||||
|
||||
|
||||
if 'ComummunityString' in params:
|
||||
Community_String = params['CommunityString']
|
||||
else:
|
||||
Community_String = 'public'
|
||||
|
||||
if 'Port' in params:
|
||||
Snmp_Port = int(params['Port'])
|
||||
if 'Host' in params:
|
||||
Snmp_Host = params['Host']
|
||||
|
||||
authData = cmdgen.CommunityData('xtreemfs-agent', 'public')
|
||||
transportTarget = cmdgen.UdpTransportTarget((Snmp_Host, Snmp_Port), 1, 0)
|
||||
|
||||
d0 = {'name': 'dir_jvm_used_mem',
|
||||
'call_back': JvmUsedMem,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'Megabytes',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The amount of memory the JVM uses currently.',
|
||||
'groups': 'dir'}
|
||||
|
||||
d1 = {'name': 'dir_jvm_free_mem',
|
||||
'call_back': JvmFreeMem,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'Megabytes',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The amount of free memory the JVM can still use.',
|
||||
'groups': 'dir'}
|
||||
|
||||
d2 = {'name': 'dir_client_connections',
|
||||
'call_back': ClientConnections,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'clients',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The number of active client connection this DIR has currently to handle.',
|
||||
'groups': 'dir'}
|
||||
|
||||
d3 = {'name': 'dir_pending_requests',
|
||||
'call_back': PendingRequests,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'pending requests',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The number of pending requests this DIR has enqueued.',
|
||||
'groups': 'dir'}
|
||||
|
||||
d4 = {'name': 'addr_mapping_count',
|
||||
'call_back': AddressMappingCount,
|
||||
'time_max': 90,
|
||||
#value_type: string | uint | float | double
|
||||
'value_type': 'uint',
|
||||
#units: unit of your metric
|
||||
'units': 'mappings',
|
||||
#slope: zero | positive | negative | both
|
||||
#This value maps to the data source types defined for RRDTool
|
||||
#If 'positive', RRD file generated will be of COUNTER type (calculating the rate of change)
|
||||
#If 'negative', ????
|
||||
#'both' will be of GAUGE type (no calculations are performed, graphing only the value reported)
|
||||
#If 'zero', the metric will appear in the "Time and String Metrics" or the "Constant Metrics" depending on the value_type of the m
|
||||
'slope': 'both',
|
||||
#format: format string of your metric
|
||||
#Must correspond to value_type otherwise value of your metric will be unpredictable (reference: http://docs.python.org/library/stdtypes.html#string-formatting)
|
||||
'format': '%u',
|
||||
#description: description of your metric
|
||||
'description': 'The number of address mapping registered at the DIR.',
|
||||
#groups (optional): groups your metric belongs to
|
||||
'groups': 'dir'}
|
||||
|
||||
d5 = {'name': 'service_count',
|
||||
'call_back': ServiceCount,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'services',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The number of services registered at the DIR.',
|
||||
'groups': 'dir'}
|
||||
|
||||
d6 = {'name': 'dir_status',
|
||||
'call_back': Status,
|
||||
'time_max': 90,
|
||||
'value_type': 'string',
|
||||
'units': '',
|
||||
'slope': 'zero',
|
||||
'format': '%s',
|
||||
'description': 'ONLINE if this DIR is running correctly, OFFLINE otherwise',
|
||||
'groups': 'dir'}
|
||||
|
||||
d7 = {'name': 'dir_uuid',
|
||||
'call_back': Uuid,
|
||||
'time_max': 90,
|
||||
'value_type': 'string',
|
||||
'units': '',
|
||||
'slope': 'zero',
|
||||
'format': '%s',
|
||||
'description': 'UUID of the DIR running on this host',
|
||||
'groups': 'dir'}
|
||||
|
||||
|
||||
descriptors = [d0, d1, d2, d3, d4, d5, d6, d7]
|
||||
|
||||
return descriptors
|
||||
|
||||
def metric_cleanup():
|
||||
'''Clean up the metric module.'''
|
||||
pass
|
||||
|
||||
|
||||
#for debugging purpose
|
||||
if __name__ == '__main__':
|
||||
params = {'CommunityString': 'public', 'Host': 'localhost', 'Port': 9001}
|
||||
metric_init(params)
|
||||
for d in descriptors:
|
||||
v = d['call_back'](d['name'])
|
||||
print 'value for %s is' % (d['name'])
|
||||
print v
|
||||
|
||||
|
||||
|
||||
221
contrib/ganglia-plugin/src/xtfs-mrc-plugin.py
Normal file
221
contrib/ganglia-plugin/src/xtfs-mrc-plugin.py
Normal file
@@ -0,0 +1,221 @@
|
||||
'''
|
||||
Created on May 25, 2011
|
||||
|
||||
@author: bzcseife
|
||||
|
||||
This is a python ganglia plugin which monitors the status of an DIR service of the XtreemFS
|
||||
filesystem. It is intend to run on the same host as the MRC and gathers information of the MRC per
|
||||
SNMP. Therefore you have to configure your MRC to provide a SNMP Agent on this host.
|
||||
|
||||
'''
|
||||
#TODO: If ganglia supports 64bit values uses 64bit integers instead of converting all 64 bit integers
|
||||
#reported from the SNMP Agent to 32bit integers.
|
||||
|
||||
|
||||
import random
|
||||
from pysnmp.entity.rfc3413.oneliner import cmdgen
|
||||
from pysnmp.entity.rfc3413.oneliner.cmdgen import UdpTransportTarget
|
||||
|
||||
|
||||
descriptors = list()
|
||||
Random_Max = 50
|
||||
Constant_Value = 50
|
||||
|
||||
|
||||
|
||||
#Get the used memory of the JVM
|
||||
def JvmUsedMem(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 1, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1] / 1024 / 1024)
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the free memory of the JVM
|
||||
def JvmFreeMem(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 2, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1] / 1024 / 1024)
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
#Get the number of client connections
|
||||
def ClientConnections(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 7, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
#Get the number of pending requests
|
||||
def PendingRequests(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 8, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the number of volumes
|
||||
def VolumeCount(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 3, 2, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
#get the status of the MRC
|
||||
def Status(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 11, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return str(varBinds[0][1])
|
||||
else:
|
||||
return "OFFLINE"
|
||||
|
||||
|
||||
#get the UUID of the MRC
|
||||
#OID: 1.3.6.1.4.1.38350.1.13.0
|
||||
def Uuid(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 13, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return str(varBinds[0][1])
|
||||
else:
|
||||
return "Service not available"
|
||||
|
||||
|
||||
def metric_init(params):
|
||||
|
||||
global descriptors
|
||||
global Commmunity_String
|
||||
global Snmp_Port
|
||||
global authData
|
||||
global transportTarget
|
||||
|
||||
|
||||
if 'ComummunityString' in params:
|
||||
Community_String = params['CommunityString']
|
||||
else:
|
||||
Community_String = 'public'
|
||||
|
||||
if 'Port' in params:
|
||||
Snmp_Port = int(params['Port'])
|
||||
if 'Host' in params:
|
||||
Snmp_Host = params['Host']
|
||||
|
||||
authData = cmdgen.CommunityData('xtreemfs-agent', 'public')
|
||||
transportTarget = cmdgen.UdpTransportTarget((Snmp_Host, Snmp_Port),1 ,0)
|
||||
|
||||
d0 = {'name': 'mrc_jvm_used_mem',
|
||||
'call_back': JvmUsedMem,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'Megabytes',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The amount of memory the JVM uses currently.',
|
||||
'groups': 'mrc'}
|
||||
|
||||
d1 = {'name': 'mrc_jvm_free_mem',
|
||||
'call_back': JvmFreeMem,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'Megabytes',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The amount of free memory the JVM can still use.',
|
||||
'groups': 'mrc'}
|
||||
|
||||
d2 = {'name': 'mrc_client_connections',
|
||||
'call_back': ClientConnections,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'clients',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The number of active client connection this MRC has currently to handle.',
|
||||
'groups': 'mrc'}
|
||||
|
||||
d3 = {'name': 'mrc_pending_requests',
|
||||
'call_back': PendingRequests,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'pending requests',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The number of pending requests this MRC has enqueued.',
|
||||
'groups': 'mrc'}
|
||||
|
||||
d4 = {'name': 'volumes_count',
|
||||
'call_back': VolumeCount,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'volumes',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The number of volumes on this MRC.',
|
||||
'groups': 'mrc'}
|
||||
|
||||
d5 = {'name': 'mrc_status',
|
||||
'call_back': Status,
|
||||
'time_max': 90,
|
||||
'value_type': 'string',
|
||||
'units': '',
|
||||
'slope': 'zero',
|
||||
'format': '%s',
|
||||
'description': 'ONLINE if this OSD is running correctly, OFFLINE otherwise',
|
||||
'groups': 'mrc'}
|
||||
|
||||
d6 = {'name': 'mrc_uuid',
|
||||
'call_back': Uuid,
|
||||
'time_max': 90,
|
||||
'value_type': 'string',
|
||||
'units': '',
|
||||
'slope': 'zero',
|
||||
'format': '%s',
|
||||
'description': 'UUID of the MRC running on this host',
|
||||
'groups': 'mrc'}
|
||||
|
||||
|
||||
|
||||
descriptors = [d0, d1, d2, d3, d4, d5, d6 ]
|
||||
|
||||
return descriptors
|
||||
|
||||
def metric_cleanup():
|
||||
'''Clean up the metric module.'''
|
||||
pass
|
||||
|
||||
|
||||
#for debugging purpose
|
||||
if __name__ == '__main__':
|
||||
params = {'CommunityString': 'public', 'Host': 'localhost', 'Port': 9002}
|
||||
metric_init(params)
|
||||
for d in descriptors:
|
||||
v = d['call_back'](d['name'])
|
||||
print 'value for %s is ' % (d['name'])
|
||||
print v
|
||||
|
||||
|
||||
|
||||
477
contrib/ganglia-plugin/src/xtfs-osd-plugin.py
Normal file
477
contrib/ganglia-plugin/src/xtfs-osd-plugin.py
Normal file
@@ -0,0 +1,477 @@
|
||||
'''
|
||||
Created on May 25, 2011
|
||||
|
||||
@author: bzcseife
|
||||
|
||||
This is a python ganglia plugin which monitors the status of an OSD service of the XtreemFS
|
||||
filesystem. It is intend to run on the same host as the OSD and gathers information of the OSD per
|
||||
SNMP. Therefore you have to configure your OSD to provide a SNMP Agent on this host.
|
||||
|
||||
'''
|
||||
#TODO: If ganglia supports 64bit values uses 64bit integers instead of converting all 64 bit integers
|
||||
#reported from the SNMP Agent to 32bit integers.
|
||||
|
||||
|
||||
import random
|
||||
from pysnmp.entity.rfc3413.oneliner import cmdgen
|
||||
from pysnmp.entity.rfc3413.oneliner.cmdgen import UdpTransportTarget
|
||||
|
||||
|
||||
|
||||
|
||||
descriptors = list()
|
||||
Random_Max = 50
|
||||
Constant_Value = 50
|
||||
|
||||
|
||||
|
||||
#Get the used memory of the JVM
|
||||
def JvmUsedMem(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 1, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1] / 1024 / 1024)
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
#Get the free memory of the JVM
|
||||
def JvmFreeMem(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 2, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1] / 1024 / 1024)
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
#Get the number of client connections
|
||||
def ClientConnections(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 7, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
#Get the number of pending requests
|
||||
def PendingRequests(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 8, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the number of objects received
|
||||
def ObjectsReceived(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 1, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the number of replicated objects received
|
||||
def ReplObjectsReceived(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 2, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the number of replicated objects transmitted
|
||||
def ObjectsTransmitted(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 3, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the number of replicated bytes received
|
||||
def ReplBytesReceived(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 4, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1] / 1024 / 1024)
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the number of bytes received
|
||||
def BytesReceived(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 5, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1] / 1024 / 1024)
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the number of bytes transmitted
|
||||
def BytesTransmitted(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 6, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1] / 1024 / 1024)
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the length of the preprocessing stage queue
|
||||
def PreprocQueueLength(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 7, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1] / 1024 / 1024)
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the length of the storage stage queue
|
||||
def StorageQueueLength(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 8, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
#Get the length of the deletion stage queue
|
||||
def DeletionQueueLength(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 9, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
#Get the number of open files from the OSD per snmp
|
||||
def OsdOpenFiles(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 10, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
#Get the number of deleted files from the OSD per snmp
|
||||
def OsdDeletedFiles(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 11, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1])
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
#Get the free space from the OSD per snmp
|
||||
def OsdFreeSpace(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 4, 12, 0))
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return int(varBinds[0][1] / 1024 / 1024)
|
||||
else:
|
||||
return 0
|
||||
|
||||
#get the status of the OSD
|
||||
def Status(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 11, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return str(varBinds[0][1])
|
||||
else:
|
||||
return "OFFLINE"
|
||||
|
||||
#get the UUID of the OSD
|
||||
#OID: 1.3.6.1.4.1.38350.1.13.0
|
||||
def Uuid(name):
|
||||
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
|
||||
transportTarget,
|
||||
(1, 3, 6, 1, 4, 1, 38350, 1, 13, 0))
|
||||
|
||||
if (errorStatus == False and errorIndication == None):
|
||||
return str(varBinds[0][1])
|
||||
else:
|
||||
return "Service not available"
|
||||
|
||||
def metric_init(params):
|
||||
|
||||
global descriptors
|
||||
global Commmunity_String
|
||||
global Snmp_Port
|
||||
global authData
|
||||
global transportTarget
|
||||
|
||||
|
||||
if 'ComummunityString' in params:
|
||||
Community_String = params['CommunityString']
|
||||
else:
|
||||
Community_String = 'public'
|
||||
|
||||
if 'Port' in params:
|
||||
Snmp_Port = int(params['Port'])
|
||||
if 'Host' in params:
|
||||
Snmp_Host = params['Host']
|
||||
|
||||
authData = cmdgen.CommunityData('xtreemfs-agent', 'public')
|
||||
transportTarget = cmdgen.UdpTransportTarget((Snmp_Host, Snmp_Port),1,0)
|
||||
|
||||
d0 = {'name': 'osd_jvm_used_mem',
|
||||
'call_back': JvmUsedMem,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'Megabytes',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The amount of memory the JVM uses currently.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d1 = {'name': 'osd_jvm_free_mem',
|
||||
'call_back': JvmFreeMem,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'Megabytes',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The amount of free memory the JVM can still use.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d2 = {'name': 'osd_client_connections',
|
||||
'call_back': ClientConnections,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'clients',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The number of active client connection this OSD has currently to handle.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d3 = {'name': 'osd_pending_requests',
|
||||
'call_back': PendingRequests,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'pending requests',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The number of pending requests this OSD has enqueued.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d4 = {'name': 'objects_received',
|
||||
'call_back': ObjectsReceived,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'objects',
|
||||
'slope': 'positive',
|
||||
'format': '%u',
|
||||
'description': 'The number of objects this OSD has received.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d5 = {'name': 'repl_objects_received',
|
||||
'call_back': ReplObjectsReceived,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'objects',
|
||||
'slope': 'positive',
|
||||
'format': '%u',
|
||||
'description': 'The number of replicated objects this OSD has received.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d6 = {'name': 'objects_transmitted',
|
||||
'call_back': ObjectsTransmitted,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'objects',
|
||||
'slope': 'positive',
|
||||
'format': '%u',
|
||||
'description': 'The number of objects this OSD has transmitted.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d7 = {'name': 'repl_bytes_received',
|
||||
'call_back': ReplBytesReceived,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'Megabytes',
|
||||
'slope': 'positive',
|
||||
'format': '%u',
|
||||
'description': 'The number of replicated bytes this OSD has received.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d8 = {'name': 'bytes_received',
|
||||
'call_back': BytesReceived,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'Megabytes',
|
||||
'slope': 'positive',
|
||||
'format': '%u',
|
||||
'description': 'The number of bytes this OSD has received.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d9 = {'name': 'bytes_transmitted',
|
||||
'call_back': BytesTransmitted,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'Megabytes',
|
||||
'slope': 'positive',
|
||||
'format': '%u',
|
||||
'description': 'The number of bytes this OSD has transmitted.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d10 = {'name': 'preproc_queue_length',
|
||||
'call_back': PreprocQueueLength,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'requests',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The length of the preprocessing stage queue of this OSD.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d11 = {'name': 'storage_queue_length',
|
||||
'call_back': StorageQueueLength,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'requests',
|
||||
'slope': 'positive',
|
||||
'format': '%u',
|
||||
'description': 'The length of the storage stage queue of this OSD.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d12 = {'name': 'deletion_queue_length',
|
||||
'call_back': DeletionQueueLength,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'requests',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The length of the deletion stage queue of this OSD.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d13 = {'name': 'storage_queue_length',
|
||||
'call_back': StorageQueueLength,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'requests',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The length of the storage stage queue of this OSD.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d14 = {'name': 'open_files',
|
||||
'call_back': OsdOpenFiles,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'files',
|
||||
'slope': 'both',
|
||||
'format': '%u',
|
||||
'description': 'The number of file this OSD has currently opened.',
|
||||
'groups': 'osd'}
|
||||
|
||||
d15 = {'name': 'deleted_files',
|
||||
'call_back': OsdDeletedFiles,
|
||||
'time_max': 90,
|
||||
'value_type': 'uint',
|
||||
'units': 'files',
|
||||
'slope': 'positive',
|
||||
'format': '%u',
|
||||
'description': 'The number of deleted files on this OSD',
|
||||
'groups': 'osd'}
|
||||
|
||||
|
||||
d16 = {'name': 'free_space',
|
||||
'call_back': OsdFreeSpace,
|
||||
'time_max': 90,
|
||||
#value_type: string | uint | float | double
|
||||
'value_type': 'uint',
|
||||
#units: unit of your metric
|
||||
'units': 'Megabytes',
|
||||
#slope: zero | positive | negative | both
|
||||
#This value maps to the data source types defined for RRDTool
|
||||
#If 'positive', RRD file generated will be of COUNTER type (calculating the rate of change)
|
||||
#If 'negative', ????
|
||||
#'both' will be of GAUGE type (no calculations are performed, graphing only the value reported)
|
||||
#If 'zero', the metric will appear in the "Time and String Metrics" or the "Constant Metrics" depending on the value_type of the m
|
||||
'slope': 'both',
|
||||
#format: format string of your metric
|
||||
#Must correspond to value_type otherwise value of your metric will be unpredictable (reference: http://docs.python.org/library/stdtypes.html#string-formatting)
|
||||
'format': '%u',
|
||||
#description: description of your metric
|
||||
'description': 'The free disc space on the partition this OSD stores the object files.',
|
||||
#groups (optional): groups your metric belongs to
|
||||
'groups': 'osd'}
|
||||
|
||||
d17 = {'name': 'osd_status',
|
||||
'call_back': Status,
|
||||
'time_max': 90,
|
||||
'value_type': 'string',
|
||||
'units': '',
|
||||
'slope': 'zero',
|
||||
'format': '%s',
|
||||
'description': 'ONLINE if this OSD is running correctly, OFFLINE otherwise',
|
||||
'groups': 'osd'}
|
||||
|
||||
d18 = {'name': 'osd_uuid',
|
||||
'call_back': Uuid,
|
||||
'time_max': 90,
|
||||
'value_type': 'string',
|
||||
'units': '',
|
||||
'slope': 'zero',
|
||||
'format': '%s',
|
||||
'description': 'UUID of the OSD running on this host',
|
||||
'groups': 'osd'}
|
||||
|
||||
descriptors = [d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12, d13, d14, d15, d16, d17, d18]
|
||||
|
||||
return descriptors
|
||||
|
||||
def metric_cleanup():
|
||||
'''Clean up the metric module.'''
|
||||
pass
|
||||
|
||||
|
||||
#for debugging purpose
|
||||
if __name__ == '__main__':
|
||||
params = {'CommunityString': 'public', 'Host': 'localhost', 'Port': 9003}
|
||||
metric_init(params)
|
||||
for d in descriptors:
|
||||
v = d['call_back'](d['name'])
|
||||
print 'value for %s is' % (d['name'])
|
||||
print v
|
||||
|
||||
|
||||
37
contrib/osd-health/osd_health_check.sh
Executable file
37
contrib/osd-health/osd_health_check.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
|
||||
OBJECT_DIR=$1
|
||||
|
||||
# get device for object_dir
|
||||
IFS=' ' read -r DEVICE TMP <<< $(df $OBJECT_DIR | grep dev)
|
||||
|
||||
# Determine device type
|
||||
if [[ $DEVICE == *md* ]]; then
|
||||
# DEVICE is a RAID configuration
|
||||
DEVICES=$(IFS=' ' read -a TMP <<< $(cat /proc/mdstat | grep $DEVICE))
|
||||
DEVICES=${DEVICES[@]:4}
|
||||
elif [[ $DEVICE == *sd* || $DEVICE == *hd* ]]; then
|
||||
# DEVICE is a single disk
|
||||
DEVICES=$DEVICE
|
||||
else
|
||||
# unsupported device type
|
||||
echo "unsupported device type"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
for DEVICE in $DEVICES; do
|
||||
SMART_STATUS="$(sudo smartctl --health $DEVICE)"
|
||||
echo $SMART_STATUS
|
||||
if [[ $SMART_STATUS == *PASSED* ]]
|
||||
then
|
||||
continue;
|
||||
elif [[ $SMART_STATUS == *FAILED* ]]
|
||||
then
|
||||
exit 1
|
||||
else
|
||||
exit 3
|
||||
fi
|
||||
done
|
||||
|
||||
# If no device's health test failed, return 0 (i.e. health test PASSED).
|
||||
exit 0
|
||||
BIN
contrib/server-repl-plugin/BabuDB_replication_plugin.jar
Normal file
BIN
contrib/server-repl-plugin/BabuDB_replication_plugin.jar
Normal file
Binary file not shown.
32
contrib/server-repl-plugin/LICENSE
Normal file
32
contrib/server-repl-plugin/LICENSE
Normal file
@@ -0,0 +1,32 @@
|
||||
Copyright (c) 2008-2011, Jan Stender, Bjoern Kolbeck, Mikael Hoegqvist,
|
||||
Felix Hupfeld, Felix Langner, Zuse Institute Berlin
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
* Neither the name of the Zuse Institute Berlin nor the
|
||||
names of its contributors may be used to endorse or promote
|
||||
products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
3
contrib/server-repl-plugin/README
Normal file
3
contrib/server-repl-plugin/README
Normal file
@@ -0,0 +1,3 @@
|
||||
For further details on how to use the BabuDB replication plug-in, please refer to the BabuDB Wiki:
|
||||
|
||||
http://code.google.com/p/babudb/wiki/UsageReplicationForJava
|
||||
82
contrib/server-repl-plugin/config/dir.properties
Normal file
82
contrib/server-repl-plugin/config/dir.properties
Normal file
@@ -0,0 +1,82 @@
|
||||
#####################################################################
|
||||
# BabuDB replication plugin configuration (DIR replication) #
|
||||
#####################################################################
|
||||
|
||||
#####################################################################
|
||||
# List of replicas and replication configuration
|
||||
#####################################################################
|
||||
|
||||
# participants of the replication including this replica
|
||||
babudb.repl.participant.0 = first-DIR-replica
|
||||
babudb.repl.participant.0.port = 35678
|
||||
babudb.repl.participant.1 = second-DIR-replica
|
||||
babudb.repl.participant.1.port = 35678
|
||||
babudb.repl.participant.2 = third-DIR-replica
|
||||
babudb.repl.participant.2.port = 35678
|
||||
|
||||
# number of servers that at least have to be up to date
|
||||
# To have a fault-tolerant system, this value has to be set to the
|
||||
# majority of nodes i.e., if you have three replicas, set this to 2
|
||||
# Please note that a setup with two nodes provides no fault-tolerance.
|
||||
babudb.repl.sync.n = 2
|
||||
|
||||
#####################################################################
|
||||
# Advanced Options (usually you do NOT have to edit these)
|
||||
#####################################################################
|
||||
|
||||
# It's possible to set the local address and port of this server explicitly.
|
||||
# If not, it will be chosen from the list of participants.
|
||||
#babudb.repl.localhost = localhost
|
||||
#babudb.repl.localport = 35678
|
||||
|
||||
# Choose here one of the predefined policies for handling database requests:
|
||||
#
|
||||
# MasterOnly - Redirect any kind of request to the master.
|
||||
# Provides strong consistency.
|
||||
# WriteRestriction - Same as MasterOnly plus lookup operations are also permitted on the slaves.
|
||||
# Consequently, clients may read stale values from a backup replica.
|
||||
# NoRestriction - Allows any kind of request to be performed at the local BabuDB instance.
|
||||
# May result into conflicts which are not resolved.
|
||||
#
|
||||
# default setting is MasterOnly.
|
||||
#babudb.repl.policy = MasterOnly
|
||||
|
||||
# DB backup directory - needed for the initial loading of the BabuDB from the
|
||||
# master in replication context
|
||||
babudb.repl.backupDir = /var/lib/xtreemfs/server-repl-dir
|
||||
|
||||
#####################################################################
|
||||
# SSL options (disabled by default)
|
||||
#####################################################################
|
||||
|
||||
# specify whether SSL is required
|
||||
#babudb.ssl.enabled = false
|
||||
|
||||
# server credentials for SSL handshakes
|
||||
#babudb.ssl.service_creds = /etc/xos/xtreemfs/truststore/certs/osd.p12
|
||||
#babudb.ssl.service_creds.pw = xtreemfs
|
||||
#babudb.ssl.service_creds.container = pkcs12
|
||||
|
||||
# trusted certificates for SSL handshakes
|
||||
#babudb.ssl.trusted_certs = /etc/xos/xtreemfs/truststore/certs/xosrootca.jks
|
||||
#babudb.ssl.trusted_certs.pw = xtreemfs
|
||||
#babudb.ssl.trusted_certs.container = jks
|
||||
|
||||
#babudb.ssl.authenticationWithoutEncryption = false
|
||||
|
||||
#####################################################################
|
||||
# Internal options (usually do not have to be touched)
|
||||
#####################################################################
|
||||
plugin.jar = /usr/share/java/BabuDB_replication_plugin.jar
|
||||
|
||||
# paths to libraries this plugin depends on
|
||||
babudb.repl.dependency.0 = /usr/share/java/Flease.jar
|
||||
|
||||
# local time renew in milliseconds
|
||||
#babudb.localTimeRenew = 0
|
||||
|
||||
# chunk size, for initial load of file chunks
|
||||
#babudb.repl.chunkSize = 5242880
|
||||
|
||||
# decides whether redirects should be handled by the user-application or not
|
||||
#babudb.repl.redirectIsVisible = false
|
||||
82
contrib/server-repl-plugin/config/mrc.properties
Normal file
82
contrib/server-repl-plugin/config/mrc.properties
Normal file
@@ -0,0 +1,82 @@
|
||||
#####################################################################
|
||||
# BabuDB replication plugin configuration (MRC replication) #
|
||||
#####################################################################
|
||||
|
||||
#####################################################################
|
||||
# List of replicas and replication configuration
|
||||
#####################################################################
|
||||
|
||||
# participants of the replication including this replica
|
||||
babudb.repl.participant.0 = first-MRC-replica
|
||||
babudb.repl.participant.0.port = 35676
|
||||
babudb.repl.participant.1 = second-MRC-replica
|
||||
babudb.repl.participant.1.port = 35676
|
||||
babudb.repl.participant.2 = third-MRC-replica
|
||||
babudb.repl.participant.2.port = 35676
|
||||
|
||||
# number of servers that at least have to be up to date
|
||||
# To have a fault-tolerant system, this value has to be set to the
|
||||
# majority of nodes i.e., if you have three replicas, set this to 2
|
||||
# Please note that a setup with two nodes provides no fault-tolerance.
|
||||
babudb.repl.sync.n = 2
|
||||
|
||||
#####################################################################
|
||||
# Advanced Options (usually you do NOT have to edit these)
|
||||
#####################################################################
|
||||
|
||||
# It's possible to set the local address and port of this server explicitly.
|
||||
# If not, it will be chosen from the list of participants.
|
||||
#babudb.repl.localhost = localhost
|
||||
#babudb.repl.localport = 35676
|
||||
|
||||
# Choose here one of the predefined policies for handling database requests:
|
||||
#
|
||||
# MasterOnly - Redirect any kind of request to the master.
|
||||
# Provides strong consistency.
|
||||
# WriteRestriction - Same as MasterOnly plus lookup operations are also permitted on the slaves.
|
||||
# Consequently, clients may read stale values from a backup replica.
|
||||
# NoRestriction - Allows any kind of request to be performed at the local BabuDB instance.
|
||||
# May result into conflicts which are not resolved.
|
||||
#
|
||||
# default setting is MasterOnly.
|
||||
#babudb.repl.policy = MasterOnly
|
||||
|
||||
# DB backup directory - needed for the initial loading of the BabuDB from the
|
||||
# master in replication context
|
||||
babudb.repl.backupDir = /var/lib/xtreemfs/server-repl-mrc
|
||||
|
||||
#####################################################################
|
||||
# SSL options (disabled by default)
|
||||
#####################################################################
|
||||
|
||||
# specify whether SSL is required
|
||||
#babudb.ssl.enabled = false
|
||||
|
||||
# server credentials for SSL handshakes
|
||||
#babudb.ssl.service_creds = /etc/xos/xtreemfs/truststore/certs/osd.p12
|
||||
#babudb.ssl.service_creds.pw = xtreemfs
|
||||
#babudb.ssl.service_creds.container = pkcs12
|
||||
|
||||
# trusted certificates for SSL handshakes
|
||||
#babudb.ssl.trusted_certs = /etc/xos/xtreemfs/truststore/certs/xosrootca.jks
|
||||
#babudb.ssl.trusted_certs.pw = xtreemfs
|
||||
#babudb.ssl.trusted_certs.container = jks
|
||||
|
||||
#babudb.ssl.authenticationWithoutEncryption = false
|
||||
|
||||
#####################################################################
|
||||
# Internal options (usually do not have to be touched)
|
||||
#####################################################################
|
||||
plugin.jar = /usr/share/java/BabuDB_replication_plugin.jar
|
||||
|
||||
# paths to libraries this plugin depends on
|
||||
babudb.repl.dependency.0 = /usr/share/java/Flease.jar
|
||||
|
||||
# local time renew in milliseconds
|
||||
#babudb.localTimeRenew = 0
|
||||
|
||||
# chunk size, for initial load of file chunks
|
||||
#babudb.repl.chunkSize = 5242880
|
||||
|
||||
# decides whether redirects should be handled by the user-application or not
|
||||
#babudb.repl.redirectIsVisible = false
|
||||
60
contrib/server-repl-plugin/update_BabuDB_replication_plugin_jar.sh
Executable file
60
contrib/server-repl-plugin/update_BabuDB_replication_plugin_jar.sh
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (c) 2012 Michael Berlin, Zuse Institute Berlin
|
||||
# Licensed under the BSD License, see LICENSE file for details.
|
||||
|
||||
set -e
|
||||
|
||||
trap onexit 1 2 3 15 ERR
|
||||
|
||||
function onexit() {
|
||||
local exit_status=${1:-$?}
|
||||
echo ERROR: Exiting $0 with $exit_status
|
||||
exit $exit_status
|
||||
}
|
||||
|
||||
replication_dir_in_babudb_trunk="java/replication"
|
||||
|
||||
cat <<EOF
|
||||
This script updates the binary .jar file which contains the BabuDB replication plugin.
|
||||
|
||||
EOF
|
||||
|
||||
if [ -z "$BABUDB" ]
|
||||
then
|
||||
known_babudb_dirs="../../../../googlecode-svn-babudb/trunk"
|
||||
for dir in $known_babudb_dirs
|
||||
do
|
||||
if [ -d "$dir" ]
|
||||
then
|
||||
BABUDB="$dir"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ -z "$BABUDB" ]
|
||||
then
|
||||
echo "The environment variable BABUDB was not set. Please point it to a checkout directory of the SVN trunk of the BabuDB project (svn checkout http://babudb.googlecode.com/svn/trunk/ babudb)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d "$BABUDB" ]
|
||||
then
|
||||
echo "The environment variable BABUDB does not point to an existing directory. BABUDB = ${BABUDB}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Updating the .jar files required by the BabuDB replication code first..."
|
||||
${BABUDB}/${replication_dir_in_babudb_trunk}/update_jar_dependencies.sh
|
||||
echo "Finished updating the .jar files required by the BabuDB replication code."
|
||||
|
||||
echo "compiling BabuDB replication plugion (BabuDB_replication_plugin.jar)"
|
||||
babudb_replication_buildfile="${BABUDB}/${replication_dir_in_babudb_trunk}/build.xml"
|
||||
babudb_replication_jar_source="${BABUDB}/${replication_dir_in_babudb_trunk}/../dist/replication/BabuDB_replication_plugin.jar"
|
||||
babudb_replication_jar_dest=$(dirname "$0")
|
||||
|
||||
# ant clean -f "$babudb_replication_buildfile" >/dev/null
|
||||
ant jar -f "$babudb_replication_buildfile" >/dev/null
|
||||
cp -a "$babudb_replication_jar_source" "$babudb_replication_jar_dest"
|
||||
|
||||
echo "finished compiling BabuDB replication plugion (BabuDB_replication_plugin.jar)"
|
||||
34
contrib/travis/parse_results.py
Executable file
34
contrib/travis/parse_results.py
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright (c) 2014 by Johannes Dillmann, Zuse Institute Berlin
|
||||
# Licensed under the BSD License, see LICENSE file for details.
|
||||
|
||||
import sys
|
||||
|
||||
import argparse
|
||||
import json
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("file", type=argparse.FileType('r'))
|
||||
parser.add_argument("test")
|
||||
args = parser.parse_args()
|
||||
|
||||
results = json.load(args.file)
|
||||
result = None
|
||||
|
||||
if args.test in results:
|
||||
result = results[args.test]
|
||||
|
||||
if type(result) == bool and result:
|
||||
print "true"
|
||||
sys.exit(0)
|
||||
|
||||
if type(result) == dict and all(result.values()):
|
||||
print "true"
|
||||
sys.exit(0)
|
||||
|
||||
print "false"
|
||||
sys.exit(1)
|
||||
|
||||
7
contrib/vagrant/provision.sh
Normal file
7
contrib/vagrant/provision.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
echo "Installing XtreemFS build dependencies"
|
||||
apt-get -y update
|
||||
apt-get -y install openjdk-7-jdk ant build-essential libssl-dev libfuse-dev libattr1-dev cmake libboost-regex-dev libboost-program-options-dev libboost-thread-dev libboost-system-dev valgrind
|
||||
echo "export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64" >> /etc/bash.bashrc
|
||||
echo "export BUILD_CLIENT_TESTS=true" >> /etc/bash.bashrc
|
||||
223
contrib/xtreemfs-osd-farm/xtreemfs-osd-farm
Executable file
223
contrib/xtreemfs-osd-farm/xtreemfs-osd-farm
Executable file
@@ -0,0 +1,223 @@
|
||||
#!/bin/bash
|
||||
|
||||
### BEGIN INIT INFO
|
||||
# Provides: xtreemfs-osd-farm
|
||||
# Required-Start: $network $remote_fs
|
||||
# Required-Stop: $network $remote_fs
|
||||
# Should-Start: xtreemfs-dir
|
||||
# Should-Stop: $null
|
||||
# Default-Start: 3 5
|
||||
# Default-Stop: 0 1 2 6
|
||||
# Short-Description: XtreemFS OSD init.d script which can start multiple OSDs on the same machine in contrast to xtreemfs-osd
|
||||
# Description: XtreemFS Object Storage Device (OSD). http://www.xtreemfs.org/
|
||||
### END INIT INFO
|
||||
|
||||
# Source function library.
|
||||
if [ -e /lib/lsb/init-functions ]; then
|
||||
. /lib/lsb/init-functions
|
||||
else
|
||||
. /etc/init.d/functions
|
||||
fi
|
||||
|
||||
XTREEMFS_USER=xtreemfs
|
||||
|
||||
# List of OSD instances which shall be started, seperated by spaces.
|
||||
# For every OSD there has to be a configuration file.
|
||||
OSD_INSTANCES="osd1 osd2 osd3"
|
||||
|
||||
# OSD specific options. Use %OSDNAME% which will be substituted.
|
||||
PID_OSD_GENERIC=/var/run/xtreemfs_%OSDNAME%.pid
|
||||
|
||||
CONFIG_OSD_GENERIC=/etc/xos/xtreemfs/%OSDNAME%.config.properties
|
||||
|
||||
LOG_OSD_GENERIC=/var/log/xtreemfs/%OSDNAME%.log
|
||||
|
||||
if [ -z $JAVA_HOME ]; then
|
||||
export JAVA_HOME=/usr
|
||||
fi
|
||||
JAVA_CALL="$JAVA_HOME/bin/java -ea -cp /usr/share/java/XtreemFS.jar:/usr/share/java/BabuDB.jar:/usr/share/java/Flease.jar:/usr/share/java/protobuf-java-2.5.0.jar:/usr/share/java/Foundation.jar:/usr/share/java/jdmkrt.jar:/usr/share/java/jdmktk.jar:/usr/share/java/commons-codec-1.3.jar"
|
||||
|
||||
# For SELinux we need to use 'runuser' not 'su'
|
||||
if [ -x "/sbin/runuser" ]; then
|
||||
SU="/sbin/runuser"
|
||||
else
|
||||
SU="/bin/su"
|
||||
fi
|
||||
|
||||
pre_check() {
|
||||
LOG_OSD="$1"
|
||||
CONFIG_OSD="$2"
|
||||
exists=`grep -c $XTREEMFS_USER /etc/passwd`
|
||||
if [ $exists -eq 0 ]; then
|
||||
echo "User $XTREEMFS_USER does not exist. Create it first."
|
||||
exit 1
|
||||
fi
|
||||
log_directory=`dirname $LOG_OSD`
|
||||
if [ ! -e $log_directory ]; then
|
||||
echo "Directory for logfiles $log_directory does not exist. Create it first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$CONFIG_OSD" ]; then
|
||||
echo -e "Config file not found: $CONFIG_OSD"
|
||||
echo
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
get_osd_list() {
|
||||
OSD_LIST=""
|
||||
if [ -n "$1" ]; then
|
||||
# Check if given OSD name in list of allowed OSDs.
|
||||
for osd in $OSD_INSTANCES; do
|
||||
[ "$osd" = "$1" ] && OSD_LIST="$1" && return 0
|
||||
done
|
||||
|
||||
echo "OSD \"$1\" is not part of the list OSD_INSTANCES."
|
||||
exit 1
|
||||
else
|
||||
OSD_LIST=$OSD_INSTANCES
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
substitute_osdname() {
|
||||
echo "$1" | sed -e "s/%OSDNAME%/$2/g"
|
||||
}
|
||||
|
||||
pre_check_vars() {
|
||||
for var in $LOG_OSD_GENERIC $PID_OSD_GENERIC $CONFIG_OSD_GENERIC; do
|
||||
echo "$var" | grep %OSDNAME% >/dev/null || {
|
||||
echo "%OSDNAME% parameter not found in variable: $var"
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
}
|
||||
|
||||
start() {
|
||||
get_osd_list "$1"
|
||||
pre_check_vars
|
||||
|
||||
for osdname in $OSD_LIST; do
|
||||
LOG_OSD=$(substitute_osdname "$LOG_OSD_GENERIC" "$osdname")
|
||||
PID_OSD=$(substitute_osdname "$PID_OSD_GENERIC" "$osdname")
|
||||
CONFIG_OSD=$(substitute_osdname "$CONFIG_OSD_GENERIC" "$osdname")
|
||||
|
||||
pre_check "$LOG_OSD" "$CONFIG_OSD"
|
||||
|
||||
echo >> $LOG_OSD
|
||||
date >> $LOG_OSD
|
||||
echo -e "Starting XtreemFS Object Storage Device (OSD): $osdname ... \n\n" >> $LOG_OSD
|
||||
|
||||
echo -n "Starting XtreemFS Object Storage Device (OSD): $osdname ... "
|
||||
$SU -s /bin/bash $XTREEMFS_USER -c "$JAVA_CALL org.xtreemfs.osd.OSD $CONFIG_OSD" >> $LOG_OSD 2>&1 &
|
||||
PROCPID=$!
|
||||
echo $PROCPID > $PID_OSD
|
||||
sleep 1s
|
||||
|
||||
if [ -e /proc/$PROCPID ]; then
|
||||
echo "success"
|
||||
else
|
||||
echo "failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
stop() {
|
||||
get_osd_list "$1"
|
||||
pre_check_vars
|
||||
|
||||
for osdname in $OSD_LIST; do
|
||||
LOG_OSD=$(substitute_osdname "$LOG_OSD_GENERIC" "$osdname")
|
||||
PID_OSD=$(substitute_osdname "$PID_OSD_GENERIC" "$osdname")
|
||||
CONFIG_OSD=$(substitute_osdname "$CONFIG_OSD_GENERIC" "$osdname")
|
||||
|
||||
result=0
|
||||
if [ -f $PID_OSD ]; then
|
||||
echo -n "Stopping XtreemFS Object Storage Device (OSD): $osdname ... "
|
||||
killproc -p $PID_OSD $SU
|
||||
result=$?
|
||||
if [ $result -eq 0 ]; then
|
||||
rm -f $PID_OSD
|
||||
echo "success"
|
||||
else
|
||||
echo "failed"
|
||||
fi
|
||||
else
|
||||
echo "XtreemFS Object Storage Device (OSD) is not running"
|
||||
fi
|
||||
|
||||
done
|
||||
return $result
|
||||
}
|
||||
|
||||
status() {
|
||||
get_osd_list "$1"
|
||||
pre_check_vars
|
||||
|
||||
rc=0
|
||||
for osdname in $OSD_LIST; do
|
||||
LOG_OSD=$(substitute_osdname "$LOG_OSD_GENERIC" "$osdname")
|
||||
PID_OSD=$(substitute_osdname "$PID_OSD_GENERIC" "$osdname")
|
||||
CONFIG_OSD=$(substitute_osdname "$CONFIG_OSD_GENERIC" "$osdname")
|
||||
|
||||
if [ -f $PID_OSD ]; then
|
||||
PROCPID=`cat $PID_OSD`
|
||||
if [ ! -e /proc/$PROCPID ]; then
|
||||
echo "XtreemFS Object Storage Device (OSD): $osdname has crashed"
|
||||
rc=1
|
||||
else
|
||||
echo "XtreemFS Object Storage Device (OSD): $osdname is running"
|
||||
fi
|
||||
else
|
||||
echo "XtreemFS Object Storage Device (OSD): $osdname is not running"
|
||||
rc=3
|
||||
fi
|
||||
done
|
||||
|
||||
return $rc
|
||||
}
|
||||
|
||||
# See how we were called.
|
||||
case "$1" in
|
||||
start)
|
||||
start "$2"
|
||||
result=$?
|
||||
;;
|
||||
stop)
|
||||
stop "$2"
|
||||
result=$?
|
||||
;;
|
||||
status)
|
||||
status "$2"
|
||||
result=$?
|
||||
;;
|
||||
reload)
|
||||
result=0
|
||||
;;
|
||||
restart)
|
||||
stop "$2" && sleep 1 && start "$2"
|
||||
result=$?
|
||||
;;
|
||||
try-restart)
|
||||
## Stop the service and if this succeeds (i.e. the
|
||||
## service was running before), start it again.
|
||||
$0 status "$2" >/dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
$0 restart "$2"
|
||||
result=$?
|
||||
else
|
||||
result=0
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo -e "Usage: $0 {start|stop|restart|reload|status|try-restart}\n"
|
||||
result=1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $result
|
||||
Reference in New Issue
Block a user