Imported Upstream version 1.5.1

This commit is contained in:
Mario Fetka
2020-09-22 02:25:22 +02:00
commit 434d6067d9
2103 changed files with 928962 additions and 0 deletions

403
contrib/benchmark/benchmark.sh Executable file
View File

@@ -0,0 +1,403 @@
#!/bin/bash
# the timeout for one execution of xtfs_benchmark
TIMEOUT=1500
# the timeout for one cleanup
TIMEOUT_CLEANUP=300
# the time to sleep after a cleanup
SLEEPTIME=600
# if false, the script will not sleep after a cleanup and after drop_caches
# (-v will set false)
SLEEP=true
# the size of the basefile for random benchmarks
BASEFILE_SIZE="100g"
# the directories for the logfiles and the results
LOG_BASE=${BENCH_LOG:-$HOME}
LOG_DIR="$LOG_BASE/log"
RESULT_DIR="$LOG_BASE/result"
# Drops caches after each benchmark. Uncomment to activate
# cp "drop_caches" to "/usr/local/bin" and add "ALL ALL=NOPASSWD: /usr/local/bin/drop_caches" to sudoers file
DROP_CACHES=${BENCH_DROP_CACHES:-"/usr/local/bin/drop_caches"}
if [[ $DROP_CACHES != "false" ]]; then
DROP_CACHES_CALL="sudo ${DROP_CACHES}"
fi
# IP and Port of the DIR
DIR=${BENCH_DIR:-"localhost:32638"}
# IP and Port of the MRC
MRC=${BENCH_MRC:-"localhost:32636"}
# space separed list of OSD_UUIDS, e.g. "osd1 osd2 ..."
OSD_UUIDS=${BENCH_OSD_UUIDS:-"test-osd0"}
# stripe size for a volume
STRIPE_SIZE="128K"
# request size for each I/O operation
REQUEST_SIZE=$STRIPE_SIZE
# replication settings
REPLICATION_POLICY=""
REPLICATION_FACTOR=1
check_env(){
# check XTREEMFS
if [ -z "$XTREEMFS" ]; then
if [ -d java -a -d cpp -a -d etc ]; then
#echo "Looks like you are in an XtreemFS base directory..."
XTREEMFS=`pwd`
elif [ -d ../java -a -d ../cpp -a -d ../etc ]; then
#echo "XTREEMFS base could be the parent directory..."
XTREEMFS=`pwd`/..
fi
fi
if [ ! -e "$XTREEMFS/java/servers/dist/XtreemFS.jar" -a ! -d "$XTREEMFS/java/lib" -a ! -f "/usr/share/java/XtreemFS.jar" ];
then
echo "XtreemFS jar could not be found!"
exit 1
fi
# check JAVA_HOME
if [ -z "$JAVA_HOME" -a ! -f "/usr/bin/java" ]; then
echo "\$JAVA_HOME not set, JDK/JRE 1.6 required"
exit 1
fi
if [ -z "$JAVA_HOME" ]; then
JAVA_HOME=/usr
fi
}
printUsage() {
cat << EOF
Synopsis
$(basename $0) -t TYPE -s NUMBER [-x NUMBER] [-p POLICY -f NUMBER] [-b NUMBER -e NUMBER] [-r NUMBER] [-v]
Run a XtreemFS benchmark series, i.e. a series of benchmarks with increasing
numbers of threads. Logs are placed in \$HOME/log/, results in \$HOME/results
(can be changed at the head of the script).
-t type
Type of benchmarks to run. Type can be either of the following:
sw sequential write
usw unaligned sequential write
sr sequential read
rw random write
rr random read
-s size
Size of one benchmark, modifier K (for KiB), M (for MiB) or G (for GiB) is mandatory.
-c size
Size of each read/write request, modifier K (for KiB), M (for MiB) or G (for GiB) is mandatory.
Defaults to 128K.
-i size
Stripe size for each volume, modifier K (for KiB), M (for MiB) or G (for GiB) is mandatory.
Defaults to 128K.
-p policy
Replication policy to use. Defaults to none.
-f factor
Replication factor to use. Defaults to 1.
-b number of threads to beginn the benchmark series
Minimum number of threads to be run as the benchmarks series.
The series will run benchmarks between the 'begin' and the 'end' number of threads.
-e number of threads to end the benchmark series
Maximum number of threads to be run as the benchmarks series.
The series will run benchmarks between the 'begin' and the 'end' number of threads.
-r repetitions
Number of times a benchmark is repeated.
-v verbose
If set, bash debugging is enabled ('set -x') and sleeping after the benchmarks
is disabled.
EOF
}
init_params(){
check_env
if ! [ -d $LOG_DIR ]; then
echo "$LOG_DIR doesn't existing. Creating $LOG_DIR..."
mkdir -p $LOG_DIR
fi
if ! [ -d $RESULT_DIR ]; then
echo "$RESULT_DIR doesn't existing. Creating $RESULT_DIR"
mkdir -p $RESULT_DIR
fi
THREADS="$(seq $BEGIN $END)"
REPETITIONS="$(seq 1 $REPETITIONS)"
# use second resolution in case multiple benchmarks are run per minute
NOW=$(date +"%y-%m-%d_%H-%M-%S")
# redirect stdout and stderr
exec 2> >(tee $LOG_DIR/$TYPE-$NOW.log)
exec > >(tee $RESULT_DIR/$TYPE-$NOW.csv)
BASEFILE_SIZE=$(parse_size $BASEFILE_SIZE)
REQUEST_SIZE=$(parse_size $REQUEST_SIZE)
STRIPE_SIZE=$(parse_size $STRIPE_SIZE)
}
parse_size(){
local size_with_modifier=$1
local index=$(echo `expr match $size_with_modifier '[0-9]\+'`)
local size=${size_with_modifier:0:$index}
local modifier=${size_with_modifier:$index}
if [ $index != ${#size_with_modifier} ]; then
if [ $modifier = "K" ] || [ $modifier = "k" ]; then
size=$(echo "$size*2^10" | bc)
elif [ $modifier = "M" ] || [ $modifier = "m" ]; then
size=$(echo "$size*2^20" | bc)
elif [ $modifier = "G" ] || [ $modifier = "g" ]; then
size=$(echo "$size*2^30" | bc)
else
echo "Wrong size modifier. Only 'M' and 'G' are allowed"
exit 1
fi
fi
echo $size
}
prepare_seq_read(){
local size=$1
local threads=$2
# declare array of volume names
local volume_index=$(echo "$threads-1" | bc)
for i in $(seq 0 $volume_index); do VOLUMES[$i]=benchmark$i; done
echo -e "\nPreparing sequential read benchmarks\n" >&2
for i in $(seq 1 $threads); do
local index=$(echo "$i-1"|bc)
timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -sw -ssize $1 --no-cleanup --user $USER \
${VOLUMES[$index]} --stripe-size $STRIPE_SIZE --chunk-size $REQUEST_SIZE
done
}
prepare_random(){
local threads=$1
# declare array of volume names
local volume_index=$(echo "$threads-1" | bc)
for i in $(seq 0 $volume_index); do VOLUMES[$i]=benchmark$i; done
# calc basefile size and round to a number divideable through REQUEST_SIZE
local basefile_size=$(echo "(($BASEFILE_SIZE/$threads)/$REQUEST_SIZE)*$REQUEST_SIZE" | bc)
echo -e "\nPreparing random benchmark: Creating a basefiles\n" >&2
for i in $(seq 1 $threads); do
local index=$(echo "$i-1"|bc)
timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -rr -rsize $REQUEST_SIZE --no-cleanup-basefile --no-cleanup-volumes --user $USER \
--basefile-size $basefile_size ${VOLUMES[$index]} --stripe-size $STRIPE_SIZE --chunk-size $REQUEST_SIZE
done
}
run_benchmark(){
local benchType=$1
local size=$2
local threads=$3
local replicationOpt=""
if [[ $REPLICATION_POLICY != "" ]]; then
replicationOpt="--replication-policy $REPLICATION_POLICY"
fi
if [ $benchType = "sr" ]; then
XTREEMFS=$XTREEMFS timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -$benchType -ssize $size -n $threads --no-cleanup-volumes --user $USER \
$replicationOpt --replication-factor $REPLICATION_FACTOR --chunk-size $REQUEST_SIZE --stripe-size $STRIPE_SIZE
elif [ $benchType = "sw" ] || [ $benchType = "usw" ]; then
XTREEMFS=$XTREEMFS timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -$benchType -ssize $size -n $threads --user $USER \
$replicationOpt --replication-factor $REPLICATION_FACTOR --chunk-size $REQUEST_SIZE --stripe-size $STRIPE_SIZE
elif [ $benchType = "rw" ] || [ $benchType = "rr" ]; then
# calc basefile size and round to a number divideable through REQUEST_SIZE
local basefile_size=$(echo "(($BASEFILE_SIZE/$threads)/$REQUEST_SIZE)*$REQUEST_SIZE" | bc)
XTREEMFS=$XTREEMFS timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -$benchType -rsize $size --basefile-size $basefile_size -n $threads \
--no-cleanup-basefile --no-cleanup-volumes --user $USER \
$replicationOpt --replication-factor $REPLICATION_FACTOR --chunk-size $REQUEST_SIZE --stripe-size $STRIPE_SIZE
fi
local bench_exit_status=$?
if [ $bench_exit_status -eq 124 ]; then
echo "The benchmark timed out (Timeout: $TIMEOUT)" >&2
interrupted_exit
elif [ $bench_exit_status -ne 0 ]; then
echo "The benchmark did not finish with exit status 0" >&2
interrupted_exit
fi
# cleanup after *every* benchmark only for seq write benchmark
if [ $benchType = "sr" ]; then
cleanup_osd
fi
}
delete_volumes(){
local number_of_threads=$1
local volume_index=$(echo "$number_of_threads-1" | bc)
for i in $(seq 0 $volume_index); do
rmfs.xtreemfs -f $MRC/benchmark$i >/dev/null
if [ $? -eq 0 ]; then
echo "Removed volume benchmark$i" >&2
fi
done
}
cleanup_osd(){
for osd in $OSD_UUIDS; do
timeout --foreground $TIMEOUT_CLEANUP $XTREEMFS/bin/xtfs_cleanup -dir pbrpc://$DIR -wait -e -delete_volumes uuid:$osd >&2
done
if $SLEEP; then
echo "Start Sleeping for $(echo "$SLEEPTIME/60"|bc) minutes at $(date)" >&2
sleep $SLEEPTIME
echo "Finished Sleeping at $(date)" >&2
fi
drop_caches
}
interrupted_exit(){
echo "Unexpected exit, cleaning up..." >&2
SLEEP=false
delete_volumes $END
cleanup_osd
exit 1
}
drop_caches(){
if [ -n "$DROP_CACHES_CALL" ]; then
echo "Dropping caches" >&2
$DROP_CACHES_CALL
if $SLEEP; then
sleep 10
fi
fi
}
##### main ###
trap "echo; echo 'Interrupt received '; interrupted_exit" INT
# show usage if invoked without options/arguments
if [ $# -eq 0 ]; then
printUsage
exit 1
fi
# default values
BEGIN=1
END=1
REPETITIONS=1
# parse options
while getopts ":t:s:c:i:b:e:r:p:f:v" opt; do
case $opt in
t)
if [ $OPTARG = "sw" ] || [ $OPTARG = "usw" ] || [ $OPTARG = "sr" ] || [ $OPTARG = "rw" ] || [ $OPTARG = "rr" ]; then
TYPE=$OPTARG
else
echo 'wrong argument to -t. Needs to be either "sw", "usw", "sr", "rw" or "rr"'
exit 1
fi
;;
s)
SIZE=$(parse_size $OPTARG)
;;
c)
REQUEST_SIZE=$(parse_size $OPTARG)
;;
i)
STRIPE_SIZE=$(parse_size $OPTARG)
;;
b)
BEGIN=$OPTARG
;;
e)
END=$OPTARG
;;
r)
REPETITIONS=$OPTARG
;;
p)
REPLICATION_POLICY=$OPTARG
;;
f)
REPLICATION_FACTOR=$OPTARG
;;
v)
SLEEP=false
set -x
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
init_params
drop_caches
echo "Running:" $0 $@ >&2
for i in $THREADS; do
size="$(echo "$SIZE/$i"|bc)"
if [ $TYPE != "usw" ]; then
size="$(echo "($size/$REQUEST_SIZE)*$REQUEST_SIZE" | bc)" # round down to a size divideable through the REQUEST_SIZE
fi
if [ $TYPE = "sr" ]; then
prepare_seq_read $size $i
cleanup_osd
elif [ $TYPE = "rw" ] || [ $TYPE = "rr" ]; then
prepare_random $i
fi
for j in $REPETITIONS; do
echo "Start $i-Thread-Benchmark Nr. $j" >&2
run_benchmark $TYPE $size $i
echo "Finished $i-Thread-Benchmark Nr. $j" >&2
done
# seq write benchmarks run cleanup after every benchmark, so this would be redundant
if [ $TYPE != "sw" ] && [ $TYPE != "usw" ]; then
volume_index=$(echo "$i-1" | bc)
for i in $(seq 0 $volume_index); do
rmfs.xtreemfs -f $MRC/benchmark$i >&2
echo "Remove volume benchmark$i" >&2
done
cleanup_osd
fi
done

3
contrib/benchmark/drop_caches Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
/bin/bash -c "echo 3 > /proc/sys/vm/drop_caches"

View File

@@ -0,0 +1,69 @@
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:date="http://exslt.org/dates-and-times"
extension-element-prefixes="date">
<!--
Copyright (c) 2013 by Michael Berlin, Zuse Institute Berlin
Licensed under the BSD License, see LICENSE file for details.
This file transforms a MRC database dump (in XML format) into
a list of files. The output format is as follows:
volume name/path on volume|creation time|file size|file's owner name
The current version lists only files which are placed on an OSD
with the UUID 'zib.mosgrid.osd15' (see line 34).
You can use the 'xsltproc' tool to apply this transformation to a XML dump.
Example: xsltproc -o filtered_files_output.txt filter_files.xslt /tmp/dump.xml
-->
<xsl:output omit-xml-declaration="yes"/>
<!--Strip off white space from all elements. We take care of the format on our own.-->
<xsl:strip-space elements="*"/>
<!--For each volume, process its "file" elements.-->
<xsl:template match="volume">
<xsl:apply-templates select="//file"/>
</xsl:template>
<xsl:template match="file[xlocList/xloc/osd/@location='zib.mosgrid.osd15']">
<!--Traverse the path of the <file> element and output the 'name' attribute of
each element to display the file system path.
The first entry is the name of the volume.-->
<xsl:for-each select="ancestor-or-self::*/@name">
<!--We ignore the <volume> element because its name is repeated as <dir> element below.-->
<xsl:if test="local-name(..) != 'volume'">
<!--Output path element.-->
<xsl:value-of select="."/>
<xsl:if test="position() != last()">
<!--Display separator.-->
<xsl:text>/</xsl:text>
</xsl:if>
</xsl:if>
</xsl:for-each>
<!--Creation time.-->
<xsl:text>|</xsl:text>
<xsl:value-of select="date:add('1970-01-01T00:00:00Z', date:duration(@ctime))"/>
<!--File size.-->
<xsl:text>|</xsl:text>
<xsl:value-of select="@size"/>
<!--Owner.-->
<xsl:text>|</xsl:text>
<xsl:value-of select="@uid"/>
<!--New line.-->
<xsl:text>&#xa;</xsl:text>
</xsl:template>
</xsl:stylesheet>

View File

@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>ganglia-plugin</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.python.pydev.PyDevBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.python.pydev.pythonNature</nature>
</natures>
</projectDescription>

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?eclipse-pydev version="1.0"?>
<pydev_project>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/ganglia-plugin/src</path>
</pydev_pathproperty>
</pydev_project>

View File

@@ -0,0 +1,6 @@
How to use this plugin?
- Make sure your ganglia installation supports python plugins.
- Copy plugin files from src directory to /usr/lib/ganglia/python_modules/
- Copy configuration files configuration-files directory to /etc/ganglia/conf.d
- Alter configuration files to suite your needs.

View File

@@ -0,0 +1,61 @@
modules {
module {
name = "xtfs-dir-plugin"
language = "python"
# The following params are examples only
param Host {
value = localhost
}
param Port {
value = 9001
}
param CommunityString {
value = public
}
}
}
collection_group {
collect_every = 60
time_threshold = 10
metric {
name = "dir_jvm_used_mem"
title = "used memory of the jvm"
value_threshold = 1
}
metric {
name = "dir_jvm_free_mem"
title = "free memory of the jvm"
value_threshold = 1
}
metric {
name = "dir_client_connections"
title = "number of Clients"
value_threshold = 1
}
metric {
name = "dir_pending_requests"
title = "number of pending requests"
value_threshold = 1
}
metric {
name = "addr_mapping_count"
title = "number of address mappings"
value_threshold = 1
}
metric {
name = "service_count"
title = "number of services"
value_threshold = 1
}
metric {
name = "dir_status"
title = "Status DIR"
}
metric {
name = "dir_uuid"
title = "DIR UUID"
}
}

View File

@@ -0,0 +1,56 @@
modules {
module {
name = "xtfs-mrc-plugin"
language = "python"
# The following params are examples only
param Host {
value = localhost
}
param Port {
value = 9002
}
param CommunityString {
value = public
}
}
}
collection_group {
collect_every = 60
time_threshold = 10
metric {
name = "mrc_jvm_used_mem"
title = "used memory of the jvm"
value_threshold = 1
}
metric {
name = "mrc_jvm_free_mem"
title = "free memory of the jvm"
value_threshold = 1
}
metric {
name = "mrc_client_connections"
title = "number of Clients"
value_threshold = 1
}
metric {
name = "mrc_pending_requests"
title = "number of pending requests"
value_threshold = 1
}
metric {
name = "volumes_count"
title = "number of volumes"
value_threshold = 1
}
metric {
name = "mrc_status"
title = "Status MRC"
}
metric {
name = "mrc_uuid"
title = "MRC UUID"
}
}

View File

@@ -0,0 +1,107 @@
modules {
module {
name = "xtfs-osd-plugin"
language = "python"
# The following params are examples only
param Host {
value = localhost
}
param Port {
value = 9003
}
param CommunityString {
value = public
}
}
}
collection_group {
collect_every = 60
time_threshold = 10
metric {
name = "osd_jvm_used_mem"
title = "used memory of the jvm"
value_threshold = 1
}
metric {
name = "osd_jvm_free_mem"
title = "free memory of the jvm"
value_threshold = 1
}
metric {
name = "osd_client_connections"
title = "number of Clients"
value_threshold = 1
}
metric {
name = "objects_received"
title = "objects received"
value_threshold = 1
}
metric {
name = "repl_objects_received"
title = "replicated objects received"
value_threshold = 1
}
metric {
name = "objects_transmitted"
title = "objects transmitted"
value_threshold = 1
}
metric {
name = "repl_bytes_received"
title = "replicated bytes received"
value_threshold = 1
}
metric {
name = "bytes_received"
title = "bytes received"
value_threshold = 1
}
metric {
name = "bytes_transmitted"
title = "bytes transmitted"
value_threshold = 1
}
metric {
name = "preproc_queue_length"
title = "preprocessing stage queue length"
value_threshold = 1
}
metric {
name = "storage_queue_length"
title = "storage stage queue length"
value_threshold = 1
}
metric {
name = "deletion_queue_length"
title = "deletion stage queue length"
value_threshold = 1
}
metric {
name = "open_files"
title = "open files"
value_threshold = 1
}
metric {
name = "deleted_files"
title = "deleted files"
value_threshold = 1
}
metric {
name = "free_space"
title = "free space"
value_threshold = 100
}
metric {
name = "osd_status"
title = "Status OSD"
}
metric {
name = "osd_uuid"
title = "OSD UUID"
}
}

View File

@@ -0,0 +1,250 @@
'''
Created on May 25, 2011
@author: bzcseife
This is a python ganglia plugin which monitors the status of an DIR service of the XtreemFS
filesystem. It is intend to run on the same host as the DIR and gathers information of the DIR per
SNMP. Therefore you have to configure your DIR to provide a SNMP Agent on this host.
'''
#TODO: If ganglia supports 64bit values uses 64bit integers instead of converting all 64 bit integers
#reported from the SNMP Agent to 32bit integers.
import random
from pysnmp.entity.rfc3413.oneliner import cmdgen
from pysnmp.entity.rfc3413.oneliner.cmdgen import UdpTransportTarget
descriptors = list()
Random_Max = 50
Constant_Value = 50
#Get the used memory of the JVM
def JvmUsedMem(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 1, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1]/1024/1024)
else:
return 0
#Get the free memory of the JVM
def JvmFreeMem(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 2, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1] / 1024 / 1024)
else:
return 0
#Get the number of client connections
def ClientConnections(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 8, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of pending requests
def PendingRequests(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 9, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of address mappings registered
def AddressMappingCount(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 2, 1, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of services registered
def ServiceCount(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 2, 2, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#get the status of the DIR
#OID: 1.3.6.1.4.1.38350.1.11.0
def Status(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 11, 0))
if (errorStatus == False and errorIndication == None):
return str(varBinds[0][1])
else:
return "OFFLINE"
#get the UUID of the DIR
#OID: 1.3.6.1.4.1.38350.1.13.0
def Uuid(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 13, 0))
if (errorStatus == False and errorIndication == None):
return str(varBinds[0][1])
else:
return "Service not available"
def metric_init(params):
global descriptors
global Commmunity_String
global Snmp_Port
global authData
global transportTarget
if 'ComummunityString' in params:
Community_String = params['CommunityString']
else:
Community_String = 'public'
if 'Port' in params:
Snmp_Port = int(params['Port'])
if 'Host' in params:
Snmp_Host = params['Host']
authData = cmdgen.CommunityData('xtreemfs-agent', 'public')
transportTarget = cmdgen.UdpTransportTarget((Snmp_Host, Snmp_Port), 1, 0)
d0 = {'name': 'dir_jvm_used_mem',
'call_back': JvmUsedMem,
'time_max': 90,
'value_type': 'uint',
'units': 'Megabytes',
'slope': 'both',
'format': '%u',
'description': 'The amount of memory the JVM uses currently.',
'groups': 'dir'}
d1 = {'name': 'dir_jvm_free_mem',
'call_back': JvmFreeMem,
'time_max': 90,
'value_type': 'uint',
'units': 'Megabytes',
'slope': 'both',
'format': '%u',
'description': 'The amount of free memory the JVM can still use.',
'groups': 'dir'}
d2 = {'name': 'dir_client_connections',
'call_back': ClientConnections,
'time_max': 90,
'value_type': 'uint',
'units': 'clients',
'slope': 'both',
'format': '%u',
'description': 'The number of active client connection this DIR has currently to handle.',
'groups': 'dir'}
d3 = {'name': 'dir_pending_requests',
'call_back': PendingRequests,
'time_max': 90,
'value_type': 'uint',
'units': 'pending requests',
'slope': 'both',
'format': '%u',
'description': 'The number of pending requests this DIR has enqueued.',
'groups': 'dir'}
d4 = {'name': 'addr_mapping_count',
'call_back': AddressMappingCount,
'time_max': 90,
#value_type: string | uint | float | double
'value_type': 'uint',
#units: unit of your metric
'units': 'mappings',
#slope: zero | positive | negative | both
#This value maps to the data source types defined for RRDTool
#If 'positive', RRD file generated will be of COUNTER type (calculating the rate of change)
#If 'negative', ????
#'both' will be of GAUGE type (no calculations are performed, graphing only the value reported)
#If 'zero', the metric will appear in the "Time and String Metrics" or the "Constant Metrics" depending on the value_type of the m
'slope': 'both',
#format: format string of your metric
#Must correspond to value_type otherwise value of your metric will be unpredictable (reference: http://docs.python.org/library/stdtypes.html#string-formatting)
'format': '%u',
#description: description of your metric
'description': 'The number of address mapping registered at the DIR.',
#groups (optional): groups your metric belongs to
'groups': 'dir'}
d5 = {'name': 'service_count',
'call_back': ServiceCount,
'time_max': 90,
'value_type': 'uint',
'units': 'services',
'slope': 'both',
'format': '%u',
'description': 'The number of services registered at the DIR.',
'groups': 'dir'}
d6 = {'name': 'dir_status',
'call_back': Status,
'time_max': 90,
'value_type': 'string',
'units': '',
'slope': 'zero',
'format': '%s',
'description': 'ONLINE if this DIR is running correctly, OFFLINE otherwise',
'groups': 'dir'}
d7 = {'name': 'dir_uuid',
'call_back': Uuid,
'time_max': 90,
'value_type': 'string',
'units': '',
'slope': 'zero',
'format': '%s',
'description': 'UUID of the DIR running on this host',
'groups': 'dir'}
descriptors = [d0, d1, d2, d3, d4, d5, d6, d7]
return descriptors
def metric_cleanup():
'''Clean up the metric module.'''
pass
#for debugging purpose
if __name__ == '__main__':
params = {'CommunityString': 'public', 'Host': 'localhost', 'Port': 9001}
metric_init(params)
for d in descriptors:
v = d['call_back'](d['name'])
print 'value for %s is' % (d['name'])
print v

View File

@@ -0,0 +1,221 @@
'''
Created on May 25, 2011
@author: bzcseife
This is a python ganglia plugin which monitors the status of an DIR service of the XtreemFS
filesystem. It is intend to run on the same host as the MRC and gathers information of the MRC per
SNMP. Therefore you have to configure your MRC to provide a SNMP Agent on this host.
'''
#TODO: If ganglia supports 64bit values uses 64bit integers instead of converting all 64 bit integers
#reported from the SNMP Agent to 32bit integers.
import random
from pysnmp.entity.rfc3413.oneliner import cmdgen
from pysnmp.entity.rfc3413.oneliner.cmdgen import UdpTransportTarget
descriptors = list()
Random_Max = 50
Constant_Value = 50
#Get the used memory of the JVM
def JvmUsedMem(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 1, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1] / 1024 / 1024)
else:
return 0
#Get the free memory of the JVM
def JvmFreeMem(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 2, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1] / 1024 / 1024)
else:
return 0
#Get the number of client connections
def ClientConnections(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 7, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of pending requests
def PendingRequests(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 8, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of volumes
def VolumeCount(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 3, 2, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#get the status of the MRC
def Status(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 11, 0))
if (errorStatus == False and errorIndication == None):
return str(varBinds[0][1])
else:
return "OFFLINE"
#get the UUID of the MRC
#OID: 1.3.6.1.4.1.38350.1.13.0
def Uuid(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 13, 0))
if (errorStatus == False and errorIndication == None):
return str(varBinds[0][1])
else:
return "Service not available"
def metric_init(params):
global descriptors
global Commmunity_String
global Snmp_Port
global authData
global transportTarget
if 'ComummunityString' in params:
Community_String = params['CommunityString']
else:
Community_String = 'public'
if 'Port' in params:
Snmp_Port = int(params['Port'])
if 'Host' in params:
Snmp_Host = params['Host']
authData = cmdgen.CommunityData('xtreemfs-agent', 'public')
transportTarget = cmdgen.UdpTransportTarget((Snmp_Host, Snmp_Port),1 ,0)
d0 = {'name': 'mrc_jvm_used_mem',
'call_back': JvmUsedMem,
'time_max': 90,
'value_type': 'uint',
'units': 'Megabytes',
'slope': 'both',
'format': '%u',
'description': 'The amount of memory the JVM uses currently.',
'groups': 'mrc'}
d1 = {'name': 'mrc_jvm_free_mem',
'call_back': JvmFreeMem,
'time_max': 90,
'value_type': 'uint',
'units': 'Megabytes',
'slope': 'both',
'format': '%u',
'description': 'The amount of free memory the JVM can still use.',
'groups': 'mrc'}
d2 = {'name': 'mrc_client_connections',
'call_back': ClientConnections,
'time_max': 90,
'value_type': 'uint',
'units': 'clients',
'slope': 'both',
'format': '%u',
'description': 'The number of active client connection this MRC has currently to handle.',
'groups': 'mrc'}
d3 = {'name': 'mrc_pending_requests',
'call_back': PendingRequests,
'time_max': 90,
'value_type': 'uint',
'units': 'pending requests',
'slope': 'both',
'format': '%u',
'description': 'The number of pending requests this MRC has enqueued.',
'groups': 'mrc'}
d4 = {'name': 'volumes_count',
'call_back': VolumeCount,
'time_max': 90,
'value_type': 'uint',
'units': 'volumes',
'slope': 'both',
'format': '%u',
'description': 'The number of volumes on this MRC.',
'groups': 'mrc'}
d5 = {'name': 'mrc_status',
'call_back': Status,
'time_max': 90,
'value_type': 'string',
'units': '',
'slope': 'zero',
'format': '%s',
'description': 'ONLINE if this OSD is running correctly, OFFLINE otherwise',
'groups': 'mrc'}
d6 = {'name': 'mrc_uuid',
'call_back': Uuid,
'time_max': 90,
'value_type': 'string',
'units': '',
'slope': 'zero',
'format': '%s',
'description': 'UUID of the MRC running on this host',
'groups': 'mrc'}
descriptors = [d0, d1, d2, d3, d4, d5, d6 ]
return descriptors
def metric_cleanup():
'''Clean up the metric module.'''
pass
#for debugging purpose
if __name__ == '__main__':
params = {'CommunityString': 'public', 'Host': 'localhost', 'Port': 9002}
metric_init(params)
for d in descriptors:
v = d['call_back'](d['name'])
print 'value for %s is ' % (d['name'])
print v

View File

@@ -0,0 +1,477 @@
'''
Created on May 25, 2011
@author: bzcseife
This is a python ganglia plugin which monitors the status of an OSD service of the XtreemFS
filesystem. It is intend to run on the same host as the OSD and gathers information of the OSD per
SNMP. Therefore you have to configure your OSD to provide a SNMP Agent on this host.
'''
#TODO: If ganglia supports 64bit values uses 64bit integers instead of converting all 64 bit integers
#reported from the SNMP Agent to 32bit integers.
import random
from pysnmp.entity.rfc3413.oneliner import cmdgen
from pysnmp.entity.rfc3413.oneliner.cmdgen import UdpTransportTarget
descriptors = list()
Random_Max = 50
Constant_Value = 50
#Get the used memory of the JVM
def JvmUsedMem(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 1, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1] / 1024 / 1024)
else:
return 0
#Get the free memory of the JVM
def JvmFreeMem(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 2, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1] / 1024 / 1024)
else:
return 0
#Get the number of client connections
def ClientConnections(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 7, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of pending requests
def PendingRequests(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 8, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of objects received
def ObjectsReceived(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 1, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of replicated objects received
def ReplObjectsReceived(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 2, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of replicated objects transmitted
def ObjectsTransmitted(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 3, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of replicated bytes received
def ReplBytesReceived(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 4, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1] / 1024 / 1024)
else:
return 0
#Get the number of bytes received
def BytesReceived(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 5, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1] / 1024 / 1024)
else:
return 0
#Get the number of bytes transmitted
def BytesTransmitted(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 6, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1] / 1024 / 1024)
else:
return 0
#Get the length of the preprocessing stage queue
def PreprocQueueLength(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 7, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1] / 1024 / 1024)
else:
return 0
#Get the length of the storage stage queue
def StorageQueueLength(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 8, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the length of the deletion stage queue
def DeletionQueueLength(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 9, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of open files from the OSD per snmp
def OsdOpenFiles(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 10, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the number of deleted files from the OSD per snmp
def OsdDeletedFiles(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 11, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1])
else:
return 0
#Get the free space from the OSD per snmp
def OsdFreeSpace(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 4, 12, 0))
if (errorStatus == False and errorIndication == None):
return int(varBinds[0][1] / 1024 / 1024)
else:
return 0
#get the status of the OSD
def Status(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 11, 0))
if (errorStatus == False and errorIndication == None):
return str(varBinds[0][1])
else:
return "OFFLINE"
#get the UUID of the OSD
#OID: 1.3.6.1.4.1.38350.1.13.0
def Uuid(name):
errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
transportTarget,
(1, 3, 6, 1, 4, 1, 38350, 1, 13, 0))
if (errorStatus == False and errorIndication == None):
return str(varBinds[0][1])
else:
return "Service not available"
def metric_init(params):
global descriptors
global Commmunity_String
global Snmp_Port
global authData
global transportTarget
if 'ComummunityString' in params:
Community_String = params['CommunityString']
else:
Community_String = 'public'
if 'Port' in params:
Snmp_Port = int(params['Port'])
if 'Host' in params:
Snmp_Host = params['Host']
authData = cmdgen.CommunityData('xtreemfs-agent', 'public')
transportTarget = cmdgen.UdpTransportTarget((Snmp_Host, Snmp_Port),1,0)
d0 = {'name': 'osd_jvm_used_mem',
'call_back': JvmUsedMem,
'time_max': 90,
'value_type': 'uint',
'units': 'Megabytes',
'slope': 'both',
'format': '%u',
'description': 'The amount of memory the JVM uses currently.',
'groups': 'osd'}
d1 = {'name': 'osd_jvm_free_mem',
'call_back': JvmFreeMem,
'time_max': 90,
'value_type': 'uint',
'units': 'Megabytes',
'slope': 'both',
'format': '%u',
'description': 'The amount of free memory the JVM can still use.',
'groups': 'osd'}
d2 = {'name': 'osd_client_connections',
'call_back': ClientConnections,
'time_max': 90,
'value_type': 'uint',
'units': 'clients',
'slope': 'both',
'format': '%u',
'description': 'The number of active client connection this OSD has currently to handle.',
'groups': 'osd'}
d3 = {'name': 'osd_pending_requests',
'call_back': PendingRequests,
'time_max': 90,
'value_type': 'uint',
'units': 'pending requests',
'slope': 'both',
'format': '%u',
'description': 'The number of pending requests this OSD has enqueued.',
'groups': 'osd'}
d4 = {'name': 'objects_received',
'call_back': ObjectsReceived,
'time_max': 90,
'value_type': 'uint',
'units': 'objects',
'slope': 'positive',
'format': '%u',
'description': 'The number of objects this OSD has received.',
'groups': 'osd'}
d5 = {'name': 'repl_objects_received',
'call_back': ReplObjectsReceived,
'time_max': 90,
'value_type': 'uint',
'units': 'objects',
'slope': 'positive',
'format': '%u',
'description': 'The number of replicated objects this OSD has received.',
'groups': 'osd'}
d6 = {'name': 'objects_transmitted',
'call_back': ObjectsTransmitted,
'time_max': 90,
'value_type': 'uint',
'units': 'objects',
'slope': 'positive',
'format': '%u',
'description': 'The number of objects this OSD has transmitted.',
'groups': 'osd'}
d7 = {'name': 'repl_bytes_received',
'call_back': ReplBytesReceived,
'time_max': 90,
'value_type': 'uint',
'units': 'Megabytes',
'slope': 'positive',
'format': '%u',
'description': 'The number of replicated bytes this OSD has received.',
'groups': 'osd'}
d8 = {'name': 'bytes_received',
'call_back': BytesReceived,
'time_max': 90,
'value_type': 'uint',
'units': 'Megabytes',
'slope': 'positive',
'format': '%u',
'description': 'The number of bytes this OSD has received.',
'groups': 'osd'}
d9 = {'name': 'bytes_transmitted',
'call_back': BytesTransmitted,
'time_max': 90,
'value_type': 'uint',
'units': 'Megabytes',
'slope': 'positive',
'format': '%u',
'description': 'The number of bytes this OSD has transmitted.',
'groups': 'osd'}
d10 = {'name': 'preproc_queue_length',
'call_back': PreprocQueueLength,
'time_max': 90,
'value_type': 'uint',
'units': 'requests',
'slope': 'both',
'format': '%u',
'description': 'The length of the preprocessing stage queue of this OSD.',
'groups': 'osd'}
d11 = {'name': 'storage_queue_length',
'call_back': StorageQueueLength,
'time_max': 90,
'value_type': 'uint',
'units': 'requests',
'slope': 'positive',
'format': '%u',
'description': 'The length of the storage stage queue of this OSD.',
'groups': 'osd'}
d12 = {'name': 'deletion_queue_length',
'call_back': DeletionQueueLength,
'time_max': 90,
'value_type': 'uint',
'units': 'requests',
'slope': 'both',
'format': '%u',
'description': 'The length of the deletion stage queue of this OSD.',
'groups': 'osd'}
d13 = {'name': 'storage_queue_length',
'call_back': StorageQueueLength,
'time_max': 90,
'value_type': 'uint',
'units': 'requests',
'slope': 'both',
'format': '%u',
'description': 'The length of the storage stage queue of this OSD.',
'groups': 'osd'}
d14 = {'name': 'open_files',
'call_back': OsdOpenFiles,
'time_max': 90,
'value_type': 'uint',
'units': 'files',
'slope': 'both',
'format': '%u',
'description': 'The number of file this OSD has currently opened.',
'groups': 'osd'}
d15 = {'name': 'deleted_files',
'call_back': OsdDeletedFiles,
'time_max': 90,
'value_type': 'uint',
'units': 'files',
'slope': 'positive',
'format': '%u',
'description': 'The number of deleted files on this OSD',
'groups': 'osd'}
d16 = {'name': 'free_space',
'call_back': OsdFreeSpace,
'time_max': 90,
#value_type: string | uint | float | double
'value_type': 'uint',
#units: unit of your metric
'units': 'Megabytes',
#slope: zero | positive | negative | both
#This value maps to the data source types defined for RRDTool
#If 'positive', RRD file generated will be of COUNTER type (calculating the rate of change)
#If 'negative', ????
#'both' will be of GAUGE type (no calculations are performed, graphing only the value reported)
#If 'zero', the metric will appear in the "Time and String Metrics" or the "Constant Metrics" depending on the value_type of the m
'slope': 'both',
#format: format string of your metric
#Must correspond to value_type otherwise value of your metric will be unpredictable (reference: http://docs.python.org/library/stdtypes.html#string-formatting)
'format': '%u',
#description: description of your metric
'description': 'The free disc space on the partition this OSD stores the object files.',
#groups (optional): groups your metric belongs to
'groups': 'osd'}
d17 = {'name': 'osd_status',
'call_back': Status,
'time_max': 90,
'value_type': 'string',
'units': '',
'slope': 'zero',
'format': '%s',
'description': 'ONLINE if this OSD is running correctly, OFFLINE otherwise',
'groups': 'osd'}
d18 = {'name': 'osd_uuid',
'call_back': Uuid,
'time_max': 90,
'value_type': 'string',
'units': '',
'slope': 'zero',
'format': '%s',
'description': 'UUID of the OSD running on this host',
'groups': 'osd'}
descriptors = [d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12, d13, d14, d15, d16, d17, d18]
return descriptors
def metric_cleanup():
'''Clean up the metric module.'''
pass
#for debugging purpose
if __name__ == '__main__':
params = {'CommunityString': 'public', 'Host': 'localhost', 'Port': 9003}
metric_init(params)
for d in descriptors:
v = d['call_back'](d['name'])
print 'value for %s is' % (d['name'])
print v

View File

@@ -0,0 +1,37 @@
#!/bin/bash
OBJECT_DIR=$1
# get device for object_dir
IFS=' ' read -r DEVICE TMP <<< $(df $OBJECT_DIR | grep dev)
# Determine device type
if [[ $DEVICE == *md* ]]; then
# DEVICE is a RAID configuration
DEVICES=$(IFS=' ' read -a TMP <<< $(cat /proc/mdstat | grep $DEVICE))
DEVICES=${DEVICES[@]:4}
elif [[ $DEVICE == *sd* || $DEVICE == *hd* ]]; then
# DEVICE is a single disk
DEVICES=$DEVICE
else
# unsupported device type
echo "unsupported device type"
exit 3
fi
for DEVICE in $DEVICES; do
SMART_STATUS="$(sudo smartctl --health $DEVICE)"
echo $SMART_STATUS
if [[ $SMART_STATUS == *PASSED* ]]
then
continue;
elif [[ $SMART_STATUS == *FAILED* ]]
then
exit 1
else
exit 3
fi
done
# If no device's health test failed, return 0 (i.e. health test PASSED).
exit 0

View File

@@ -0,0 +1,32 @@
Copyright (c) 2008-2011, Jan Stender, Bjoern Kolbeck, Mikael Hoegqvist,
Felix Hupfeld, Felix Langner, Zuse Institute Berlin
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above
copyright notice, this list of conditions and the
following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
* Neither the name of the Zuse Institute Berlin nor the
names of its contributors may be used to endorse or promote
products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,3 @@
For further details on how to use the BabuDB replication plug-in, please refer to the BabuDB Wiki:
http://code.google.com/p/babudb/wiki/UsageReplicationForJava

View File

@@ -0,0 +1,82 @@
#####################################################################
# BabuDB replication plugin configuration (DIR replication) #
#####################################################################
#####################################################################
# List of replicas and replication configuration
#####################################################################
# participants of the replication including this replica
babudb.repl.participant.0 = first-DIR-replica
babudb.repl.participant.0.port = 35678
babudb.repl.participant.1 = second-DIR-replica
babudb.repl.participant.1.port = 35678
babudb.repl.participant.2 = third-DIR-replica
babudb.repl.participant.2.port = 35678
# number of servers that at least have to be up to date
# To have a fault-tolerant system, this value has to be set to the
# majority of nodes i.e., if you have three replicas, set this to 2
# Please note that a setup with two nodes provides no fault-tolerance.
babudb.repl.sync.n = 2
#####################################################################
# Advanced Options (usually you do NOT have to edit these)
#####################################################################
# It's possible to set the local address and port of this server explicitly.
# If not, it will be chosen from the list of participants.
#babudb.repl.localhost = localhost
#babudb.repl.localport = 35678
# Choose here one of the predefined policies for handling database requests:
#
# MasterOnly - Redirect any kind of request to the master.
# Provides strong consistency.
# WriteRestriction - Same as MasterOnly plus lookup operations are also permitted on the slaves.
# Consequently, clients may read stale values from a backup replica.
# NoRestriction - Allows any kind of request to be performed at the local BabuDB instance.
# May result into conflicts which are not resolved.
#
# default setting is MasterOnly.
#babudb.repl.policy = MasterOnly
# DB backup directory - needed for the initial loading of the BabuDB from the
# master in replication context
babudb.repl.backupDir = /var/lib/xtreemfs/server-repl-dir
#####################################################################
# SSL options (disabled by default)
#####################################################################
# specify whether SSL is required
#babudb.ssl.enabled = false
# server credentials for SSL handshakes
#babudb.ssl.service_creds = /etc/xos/xtreemfs/truststore/certs/osd.p12
#babudb.ssl.service_creds.pw = xtreemfs
#babudb.ssl.service_creds.container = pkcs12
# trusted certificates for SSL handshakes
#babudb.ssl.trusted_certs = /etc/xos/xtreemfs/truststore/certs/xosrootca.jks
#babudb.ssl.trusted_certs.pw = xtreemfs
#babudb.ssl.trusted_certs.container = jks
#babudb.ssl.authenticationWithoutEncryption = false
#####################################################################
# Internal options (usually do not have to be touched)
#####################################################################
plugin.jar = /usr/share/java/BabuDB_replication_plugin.jar
# paths to libraries this plugin depends on
babudb.repl.dependency.0 = /usr/share/java/Flease.jar
# local time renew in milliseconds
#babudb.localTimeRenew = 0
# chunk size, for initial load of file chunks
#babudb.repl.chunkSize = 5242880
# decides whether redirects should be handled by the user-application or not
#babudb.repl.redirectIsVisible = false

View File

@@ -0,0 +1,82 @@
#####################################################################
# BabuDB replication plugin configuration (MRC replication) #
#####################################################################
#####################################################################
# List of replicas and replication configuration
#####################################################################
# participants of the replication including this replica
babudb.repl.participant.0 = first-MRC-replica
babudb.repl.participant.0.port = 35676
babudb.repl.participant.1 = second-MRC-replica
babudb.repl.participant.1.port = 35676
babudb.repl.participant.2 = third-MRC-replica
babudb.repl.participant.2.port = 35676
# number of servers that at least have to be up to date
# To have a fault-tolerant system, this value has to be set to the
# majority of nodes i.e., if you have three replicas, set this to 2
# Please note that a setup with two nodes provides no fault-tolerance.
babudb.repl.sync.n = 2
#####################################################################
# Advanced Options (usually you do NOT have to edit these)
#####################################################################
# It's possible to set the local address and port of this server explicitly.
# If not, it will be chosen from the list of participants.
#babudb.repl.localhost = localhost
#babudb.repl.localport = 35676
# Choose here one of the predefined policies for handling database requests:
#
# MasterOnly - Redirect any kind of request to the master.
# Provides strong consistency.
# WriteRestriction - Same as MasterOnly plus lookup operations are also permitted on the slaves.
# Consequently, clients may read stale values from a backup replica.
# NoRestriction - Allows any kind of request to be performed at the local BabuDB instance.
# May result into conflicts which are not resolved.
#
# default setting is MasterOnly.
#babudb.repl.policy = MasterOnly
# DB backup directory - needed for the initial loading of the BabuDB from the
# master in replication context
babudb.repl.backupDir = /var/lib/xtreemfs/server-repl-mrc
#####################################################################
# SSL options (disabled by default)
#####################################################################
# specify whether SSL is required
#babudb.ssl.enabled = false
# server credentials for SSL handshakes
#babudb.ssl.service_creds = /etc/xos/xtreemfs/truststore/certs/osd.p12
#babudb.ssl.service_creds.pw = xtreemfs
#babudb.ssl.service_creds.container = pkcs12
# trusted certificates for SSL handshakes
#babudb.ssl.trusted_certs = /etc/xos/xtreemfs/truststore/certs/xosrootca.jks
#babudb.ssl.trusted_certs.pw = xtreemfs
#babudb.ssl.trusted_certs.container = jks
#babudb.ssl.authenticationWithoutEncryption = false
#####################################################################
# Internal options (usually do not have to be touched)
#####################################################################
plugin.jar = /usr/share/java/BabuDB_replication_plugin.jar
# paths to libraries this plugin depends on
babudb.repl.dependency.0 = /usr/share/java/Flease.jar
# local time renew in milliseconds
#babudb.localTimeRenew = 0
# chunk size, for initial load of file chunks
#babudb.repl.chunkSize = 5242880
# decides whether redirects should be handled by the user-application or not
#babudb.repl.redirectIsVisible = false

View File

@@ -0,0 +1,60 @@
#!/bin/bash
# Copyright (c) 2012 Michael Berlin, Zuse Institute Berlin
# Licensed under the BSD License, see LICENSE file for details.
set -e
trap onexit 1 2 3 15 ERR
function onexit() {
local exit_status=${1:-$?}
echo ERROR: Exiting $0 with $exit_status
exit $exit_status
}
replication_dir_in_babudb_trunk="java/replication"
cat <<EOF
This script updates the binary .jar file which contains the BabuDB replication plugin.
EOF
if [ -z "$BABUDB" ]
then
known_babudb_dirs="../../../../googlecode-svn-babudb/trunk"
for dir in $known_babudb_dirs
do
if [ -d "$dir" ]
then
BABUDB="$dir"
fi
done
fi
if [ -z "$BABUDB" ]
then
echo "The environment variable BABUDB was not set. Please point it to a checkout directory of the SVN trunk of the BabuDB project (svn checkout http://babudb.googlecode.com/svn/trunk/ babudb)."
exit 1
fi
if [ ! -d "$BABUDB" ]
then
echo "The environment variable BABUDB does not point to an existing directory. BABUDB = ${BABUDB}"
exit 1
fi
echo "Updating the .jar files required by the BabuDB replication code first..."
${BABUDB}/${replication_dir_in_babudb_trunk}/update_jar_dependencies.sh
echo "Finished updating the .jar files required by the BabuDB replication code."
echo "compiling BabuDB replication plugion (BabuDB_replication_plugin.jar)"
babudb_replication_buildfile="${BABUDB}/${replication_dir_in_babudb_trunk}/build.xml"
babudb_replication_jar_source="${BABUDB}/${replication_dir_in_babudb_trunk}/../dist/replication/BabuDB_replication_plugin.jar"
babudb_replication_jar_dest=$(dirname "$0")
# ant clean -f "$babudb_replication_buildfile" >/dev/null
ant jar -f "$babudb_replication_buildfile" >/dev/null
cp -a "$babudb_replication_jar_source" "$babudb_replication_jar_dest"
echo "finished compiling BabuDB replication plugion (BabuDB_replication_plugin.jar)"

34
contrib/travis/parse_results.py Executable file
View File

@@ -0,0 +1,34 @@
#!/usr/bin/env python
# Copyright (c) 2014 by Johannes Dillmann, Zuse Institute Berlin
# Licensed under the BSD License, see LICENSE file for details.
import sys
import argparse
import json
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("file", type=argparse.FileType('r'))
parser.add_argument("test")
args = parser.parse_args()
results = json.load(args.file)
result = None
if args.test in results:
result = results[args.test]
if type(result) == bool and result:
print "true"
sys.exit(0)
if type(result) == dict and all(result.values()):
print "true"
sys.exit(0)
print "false"
sys.exit(1)

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
echo "Installing XtreemFS build dependencies"
apt-get -y update
apt-get -y install openjdk-7-jdk ant build-essential libssl-dev libfuse-dev libattr1-dev cmake libboost-regex-dev libboost-program-options-dev libboost-thread-dev libboost-system-dev valgrind
echo "export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64" >> /etc/bash.bashrc
echo "export BUILD_CLIENT_TESTS=true" >> /etc/bash.bashrc

View File

@@ -0,0 +1,223 @@
#!/bin/bash
### BEGIN INIT INFO
# Provides: xtreemfs-osd-farm
# Required-Start: $network $remote_fs
# Required-Stop: $network $remote_fs
# Should-Start: xtreemfs-dir
# Should-Stop: $null
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: XtreemFS OSD init.d script which can start multiple OSDs on the same machine in contrast to xtreemfs-osd
# Description: XtreemFS Object Storage Device (OSD). http://www.xtreemfs.org/
### END INIT INFO
# Source function library.
if [ -e /lib/lsb/init-functions ]; then
. /lib/lsb/init-functions
else
. /etc/init.d/functions
fi
XTREEMFS_USER=xtreemfs
# List of OSD instances which shall be started, seperated by spaces.
# For every OSD there has to be a configuration file.
OSD_INSTANCES="osd1 osd2 osd3"
# OSD specific options. Use %OSDNAME% which will be substituted.
PID_OSD_GENERIC=/var/run/xtreemfs_%OSDNAME%.pid
CONFIG_OSD_GENERIC=/etc/xos/xtreemfs/%OSDNAME%.config.properties
LOG_OSD_GENERIC=/var/log/xtreemfs/%OSDNAME%.log
if [ -z $JAVA_HOME ]; then
export JAVA_HOME=/usr
fi
JAVA_CALL="$JAVA_HOME/bin/java -ea -cp /usr/share/java/XtreemFS.jar:/usr/share/java/BabuDB.jar:/usr/share/java/Flease.jar:/usr/share/java/protobuf-java-2.5.0.jar:/usr/share/java/Foundation.jar:/usr/share/java/jdmkrt.jar:/usr/share/java/jdmktk.jar:/usr/share/java/commons-codec-1.3.jar"
# For SELinux we need to use 'runuser' not 'su'
if [ -x "/sbin/runuser" ]; then
SU="/sbin/runuser"
else
SU="/bin/su"
fi
pre_check() {
LOG_OSD="$1"
CONFIG_OSD="$2"
exists=`grep -c $XTREEMFS_USER /etc/passwd`
if [ $exists -eq 0 ]; then
echo "User $XTREEMFS_USER does not exist. Create it first."
exit 1
fi
log_directory=`dirname $LOG_OSD`
if [ ! -e $log_directory ]; then
echo "Directory for logfiles $log_directory does not exist. Create it first."
exit 1
fi
if [ ! -f "$CONFIG_OSD" ]; then
echo -e "Config file not found: $CONFIG_OSD"
echo
exit 1
fi
}
get_osd_list() {
OSD_LIST=""
if [ -n "$1" ]; then
# Check if given OSD name in list of allowed OSDs.
for osd in $OSD_INSTANCES; do
[ "$osd" = "$1" ] && OSD_LIST="$1" && return 0
done
echo "OSD \"$1\" is not part of the list OSD_INSTANCES."
exit 1
else
OSD_LIST=$OSD_INSTANCES
return 0
fi
}
substitute_osdname() {
echo "$1" | sed -e "s/%OSDNAME%/$2/g"
}
pre_check_vars() {
for var in $LOG_OSD_GENERIC $PID_OSD_GENERIC $CONFIG_OSD_GENERIC; do
echo "$var" | grep %OSDNAME% >/dev/null || {
echo "%OSDNAME% parameter not found in variable: $var"
exit 1
}
done
}
start() {
get_osd_list "$1"
pre_check_vars
for osdname in $OSD_LIST; do
LOG_OSD=$(substitute_osdname "$LOG_OSD_GENERIC" "$osdname")
PID_OSD=$(substitute_osdname "$PID_OSD_GENERIC" "$osdname")
CONFIG_OSD=$(substitute_osdname "$CONFIG_OSD_GENERIC" "$osdname")
pre_check "$LOG_OSD" "$CONFIG_OSD"
echo >> $LOG_OSD
date >> $LOG_OSD
echo -e "Starting XtreemFS Object Storage Device (OSD): $osdname ... \n\n" >> $LOG_OSD
echo -n "Starting XtreemFS Object Storage Device (OSD): $osdname ... "
$SU -s /bin/bash $XTREEMFS_USER -c "$JAVA_CALL org.xtreemfs.osd.OSD $CONFIG_OSD" >> $LOG_OSD 2>&1 &
PROCPID=$!
echo $PROCPID > $PID_OSD
sleep 1s
if [ -e /proc/$PROCPID ]; then
echo "success"
else
echo "failed"
return 1
fi
done
return 0
}
stop() {
get_osd_list "$1"
pre_check_vars
for osdname in $OSD_LIST; do
LOG_OSD=$(substitute_osdname "$LOG_OSD_GENERIC" "$osdname")
PID_OSD=$(substitute_osdname "$PID_OSD_GENERIC" "$osdname")
CONFIG_OSD=$(substitute_osdname "$CONFIG_OSD_GENERIC" "$osdname")
result=0
if [ -f $PID_OSD ]; then
echo -n "Stopping XtreemFS Object Storage Device (OSD): $osdname ... "
killproc -p $PID_OSD $SU
result=$?
if [ $result -eq 0 ]; then
rm -f $PID_OSD
echo "success"
else
echo "failed"
fi
else
echo "XtreemFS Object Storage Device (OSD) is not running"
fi
done
return $result
}
status() {
get_osd_list "$1"
pre_check_vars
rc=0
for osdname in $OSD_LIST; do
LOG_OSD=$(substitute_osdname "$LOG_OSD_GENERIC" "$osdname")
PID_OSD=$(substitute_osdname "$PID_OSD_GENERIC" "$osdname")
CONFIG_OSD=$(substitute_osdname "$CONFIG_OSD_GENERIC" "$osdname")
if [ -f $PID_OSD ]; then
PROCPID=`cat $PID_OSD`
if [ ! -e /proc/$PROCPID ]; then
echo "XtreemFS Object Storage Device (OSD): $osdname has crashed"
rc=1
else
echo "XtreemFS Object Storage Device (OSD): $osdname is running"
fi
else
echo "XtreemFS Object Storage Device (OSD): $osdname is not running"
rc=3
fi
done
return $rc
}
# See how we were called.
case "$1" in
start)
start "$2"
result=$?
;;
stop)
stop "$2"
result=$?
;;
status)
status "$2"
result=$?
;;
reload)
result=0
;;
restart)
stop "$2" && sleep 1 && start "$2"
result=$?
;;
try-restart)
## Stop the service and if this succeeds (i.e. the
## service was running before), start it again.
$0 status "$2" >/dev/null
if [ $? -eq 0 ]; then
$0 restart "$2"
result=$?
else
result=0
fi
;;
*)
echo -e "Usage: $0 {start|stop|restart|reload|status|try-restart}\n"
result=1
;;
esac
exit $result