Imported Upstream version 1.5.1

2020-09-22 02:25:22 +02:00
commit 434d6067d9
2103 changed files with 928962 additions and 0 deletions
--- a/contrib/benchmark/benchmark.sh
+++ b/contrib/benchmark/benchmark.sh
@@ -0,0 +1,403 @@
+#!/bin/bash
+
+# the timeout for one execution of xtfs_benchmark
+TIMEOUT=1500
+
+# the timeout for one cleanup
+TIMEOUT_CLEANUP=300
+
+# the time to sleep after a cleanup
+SLEEPTIME=600
+
+# if false, the script will not sleep after a cleanup and after drop_caches
+# (-v will set false)
+SLEEP=true
+
+# the size of the basefile for random benchmarks
+BASEFILE_SIZE="100g"
+
+# the directories for the logfiles and the results
+LOG_BASE=${BENCH_LOG:-$HOME}
+LOG_DIR="$LOG_BASE/log"
+RESULT_DIR="$LOG_BASE/result"
+
+# Drops caches after each benchmark. Uncomment to activate
+# cp "drop_caches" to "/usr/local/bin" and add "ALL ALL=NOPASSWD: /usr/local/bin/drop_caches" to sudoers file
+DROP_CACHES=${BENCH_DROP_CACHES:-"/usr/local/bin/drop_caches"}
+if [[ $DROP_CACHES != "false" ]]; then
+  DROP_CACHES_CALL="sudo ${DROP_CACHES}"
+fi
+
+# IP and Port of the DIR
+DIR=${BENCH_DIR:-"localhost:32638"}
+
+# IP and Port of the MRC
+MRC=${BENCH_MRC:-"localhost:32636"}
+
+# space separed list of OSD_UUIDS, e.g. "osd1 osd2 ..."
+OSD_UUIDS=${BENCH_OSD_UUIDS:-"test-osd0"}
+
+# stripe size for a volume
+STRIPE_SIZE="128K"
+
+# request size for each I/O operation
+REQUEST_SIZE=$STRIPE_SIZE
+
+# replication settings
+REPLICATION_POLICY=""
+REPLICATION_FACTOR=1
+
+check_env(){
+  # check XTREEMFS
+  if [ -z "$XTREEMFS" ]; then
+    if [ -d java -a -d cpp -a -d etc ]; then
+      #echo "Looks like you are in an XtreemFS base directory..."
+      XTREEMFS=`pwd`
+    elif [ -d ../java -a -d ../cpp -a -d ../etc ]; then
+      #echo "XTREEMFS base could be the parent directory..."
+      XTREEMFS=`pwd`/..
+    fi
+  fi
+  if [ ! -e "$XTREEMFS/java/servers/dist/XtreemFS.jar" -a ! -d "$XTREEMFS/java/lib" -a ! -f "/usr/share/java/XtreemFS.jar" ];
+  then
+    echo "XtreemFS jar could not be found!"
+    exit 1
+  fi
+
+  # check JAVA_HOME
+  if [ -z "$JAVA_HOME" -a ! -f "/usr/bin/java" ]; then
+    echo "\$JAVA_HOME not set, JDK/JRE 1.6 required"
+    exit 1
+  fi
+
+  if [ -z "$JAVA_HOME" ]; then
+    JAVA_HOME=/usr
+  fi
+
+}
+
+printUsage() {
+  cat << EOF
+
+Synopsis
+  $(basename $0) -t TYPE -s NUMBER [-x NUMBER] [-p POLICY -f NUMBER] [-b NUMBER -e NUMBER] [-r NUMBER] [-v]
+  Run a XtreemFS benchmark series, i.e. a series of benchmarks with increasing
+  numbers of threads. Logs are placed in \$HOME/log/, results in \$HOME/results
+  (can be changed at the head of the script).
+
+  -t type
+    Type of benchmarks to run. Type can be either of the following:
+      sw sequential write
+      usw unaligned sequential write
+      sr sequential read
+      rw random write
+      rr random read
+
+  -s size
+    Size of one benchmark, modifier K (for KiB), M (for MiB) or G (for GiB) is mandatory.
+
+  -c size
+    Size of each read/write request, modifier K (for KiB), M (for MiB) or G (for GiB) is mandatory.
+    Defaults to 128K.
+
+  -i size
+    Stripe size for each volume, modifier K (for KiB), M (for MiB) or G (for GiB) is mandatory.
+    Defaults to 128K.
+
+  -p policy
+    Replication policy to use. Defaults to none.
+
+  -f factor
+    Replication factor to use. Defaults to 1.
+
+  -b number of threads to beginn the benchmark series
+    Minimum number of threads to be run as the benchmarks series.
+    The series will run benchmarks between the 'begin' and the 'end' number of threads.
+
+  -e number of threads to end the benchmark series
+    Maximum number of threads to be run as the benchmarks series.
+    The series will run benchmarks between the 'begin' and the 'end' number of threads.
+
+  -r repetitions
+    Number of times a benchmark is repeated.
+
+  -v verbose
+    If set, bash debugging is enabled ('set -x') and sleeping after the benchmarks
+    is disabled.
+
+EOF
+}
+
+init_params(){
+
+  check_env
+
+  if ! [ -d $LOG_DIR ]; then
+    echo "$LOG_DIR doesn't existing. Creating $LOG_DIR..."
+    mkdir -p $LOG_DIR
+  fi
+  if ! [ -d $RESULT_DIR ]; then
+    echo "$RESULT_DIR doesn't existing. Creating $RESULT_DIR"
+    mkdir -p $RESULT_DIR
+  fi
+
+  THREADS="$(seq $BEGIN $END)"
+  REPETITIONS="$(seq 1 $REPETITIONS)"
+
+  # use second resolution in case multiple benchmarks are run per minute
+  NOW=$(date +"%y-%m-%d_%H-%M-%S")
+  # redirect stdout and stderr
+  exec 2> >(tee $LOG_DIR/$TYPE-$NOW.log)
+  exec > >(tee $RESULT_DIR/$TYPE-$NOW.csv)
+
+  BASEFILE_SIZE=$(parse_size $BASEFILE_SIZE)
+  REQUEST_SIZE=$(parse_size $REQUEST_SIZE)
+  STRIPE_SIZE=$(parse_size $STRIPE_SIZE)
+
+}
+
+
+parse_size(){
+  local size_with_modifier=$1
+  local index=$(echo `expr match $size_with_modifier '[0-9]\+'`)
+  local size=${size_with_modifier:0:$index}
+  local modifier=${size_with_modifier:$index}
+
+  if [ $index != ${#size_with_modifier} ]; then
+    if [ $modifier = "K" ] || [ $modifier = "k" ]; then
+      size=$(echo "$size*2^10" | bc)
+    elif [ $modifier = "M" ] || [ $modifier = "m" ]; then
+      size=$(echo "$size*2^20" | bc)
+    elif [ $modifier = "G" ] || [ $modifier = "g" ]; then
+      size=$(echo "$size*2^30" | bc)
+    else
+      echo "Wrong size modifier. Only 'M' and 'G' are allowed"
+      exit 1
+    fi
+  fi
+
+  echo $size
+}
+
+
+
+prepare_seq_read(){
+
+  local size=$1
+  local threads=$2
+
+  # declare array of volume names
+  local volume_index=$(echo "$threads-1" | bc)
+  for i in $(seq 0 $volume_index); do VOLUMES[$i]=benchmark$i; done
+
+  echo -e "\nPreparing sequential read benchmarks\n"  >&2
+  for i in $(seq 1 $threads); do
+    local index=$(echo "$i-1"|bc)
+    timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -sw -ssize $1 --no-cleanup --user $USER \
+      ${VOLUMES[$index]} --stripe-size $STRIPE_SIZE --chunk-size $REQUEST_SIZE
+  done
+}
+
+prepare_random(){
+
+  local threads=$1
+
+  # declare array of volume names
+  local volume_index=$(echo "$threads-1" | bc)
+  for i in $(seq 0 $volume_index); do VOLUMES[$i]=benchmark$i; done
+
+  # calc basefile size and round to a number divideable through REQUEST_SIZE
+  local basefile_size=$(echo "(($BASEFILE_SIZE/$threads)/$REQUEST_SIZE)*$REQUEST_SIZE" | bc)
+
+  echo -e "\nPreparing random benchmark: Creating a basefiles\n"  >&2
+  for i in $(seq 1 $threads); do
+    local index=$(echo "$i-1"|bc)
+    timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -rr -rsize $REQUEST_SIZE --no-cleanup-basefile --no-cleanup-volumes --user $USER \
+      --basefile-size $basefile_size ${VOLUMES[$index]} --stripe-size $STRIPE_SIZE --chunk-size $REQUEST_SIZE
+  done
+}
+
+
+run_benchmark(){
+  local benchType=$1
+  local size=$2
+  local threads=$3
+  local replicationOpt=""
+  if [[ $REPLICATION_POLICY != "" ]]; then
+    replicationOpt="--replication-policy $REPLICATION_POLICY"
+  fi
+
+  if [ $benchType = "sr" ]; then
+    XTREEMFS=$XTREEMFS timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -$benchType -ssize $size -n $threads --no-cleanup-volumes --user $USER \
+     $replicationOpt --replication-factor $REPLICATION_FACTOR --chunk-size $REQUEST_SIZE --stripe-size $STRIPE_SIZE
+  elif [ $benchType = "sw" ] || [ $benchType = "usw" ]; then
+    XTREEMFS=$XTREEMFS timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -$benchType -ssize $size -n $threads --user $USER \
+     $replicationOpt --replication-factor $REPLICATION_FACTOR --chunk-size $REQUEST_SIZE --stripe-size $STRIPE_SIZE
+  elif [ $benchType = "rw" ] || [ $benchType = "rr" ]; then
+    # calc basefile size and round to a number divideable through REQUEST_SIZE
+    local basefile_size=$(echo "(($BASEFILE_SIZE/$threads)/$REQUEST_SIZE)*$REQUEST_SIZE" | bc)
+    XTREEMFS=$XTREEMFS timeout --foreground $TIMEOUT $XTREEMFS/bin/xtfs_benchmark -$benchType -rsize $size --basefile-size $basefile_size -n $threads \
+      --no-cleanup-basefile --no-cleanup-volumes --user $USER \
+      $replicationOpt --replication-factor $REPLICATION_FACTOR --chunk-size $REQUEST_SIZE --stripe-size $STRIPE_SIZE
+  fi
+
+  local bench_exit_status=$?
+  if [ $bench_exit_status -eq 124 ]; then
+    echo "The benchmark timed out (Timeout: $TIMEOUT)" >&2
+    interrupted_exit
+  elif [ $bench_exit_status -ne 0 ]; then
+    echo "The benchmark did not finish with exit status 0" >&2
+    interrupted_exit
+  fi
+
+  # cleanup after *every* benchmark only for seq write benchmark
+  if [ $benchType = "sr" ]; then
+    cleanup_osd
+  fi
+}
+
+delete_volumes(){
+  local number_of_threads=$1
+  local volume_index=$(echo "$number_of_threads-1" | bc)
+  for i in $(seq 0 $volume_index); do
+    rmfs.xtreemfs -f $MRC/benchmark$i >/dev/null
+    if [ $? -eq 0 ]; then
+      echo "Removed volume benchmark$i"  >&2
+    fi
+  done
+}
+
+cleanup_osd(){
+  for osd in $OSD_UUIDS; do
+    timeout --foreground $TIMEOUT_CLEANUP $XTREEMFS/bin/xtfs_cleanup -dir pbrpc://$DIR -wait -e -delete_volumes uuid:$osd  >&2
+  done
+  if $SLEEP; then
+    echo "Start Sleeping for $(echo "$SLEEPTIME/60"|bc) minutes at $(date)" >&2
+    sleep $SLEEPTIME
+    echo "Finished Sleeping at $(date)" >&2
+  fi
+  drop_caches
+}
+
+interrupted_exit(){
+  echo "Unexpected exit, cleaning up..."  >&2
+  SLEEP=false
+  delete_volumes $END
+  cleanup_osd
+  exit 1
+}
+
+drop_caches(){
+  if [ -n "$DROP_CACHES_CALL" ]; then
+    echo "Dropping caches" >&2
+    $DROP_CACHES_CALL
+    if $SLEEP; then
+      sleep 10
+    fi
+  fi
+}
+
+##### main ###
+
+trap "echo; echo 'Interrupt received '; interrupted_exit" INT
+
+# show usage if invoked without options/arguments
+if [ $# -eq 0 ]; then
+  printUsage
+  exit 1
+fi
+
+# default values
+BEGIN=1
+END=1
+REPETITIONS=1
+
+
+# parse options
+while getopts ":t:s:c:i:b:e:r:p:f:v" opt; do
+  case $opt in
+    t)
+      if [ $OPTARG = "sw" ] || [ $OPTARG = "usw" ] || [ $OPTARG = "sr" ]  || [ $OPTARG = "rw" ] || [ $OPTARG = "rr" ]; then
+        TYPE=$OPTARG
+      else
+        echo 'wrong argument to -t. Needs to be either "sw", "usw", "sr", "rw" or "rr"'
+        exit 1
+      fi
+      ;;
+    s)
+      SIZE=$(parse_size $OPTARG)
+      ;;
+    c)
+      REQUEST_SIZE=$(parse_size $OPTARG)
+      ;;
+    i)
+      STRIPE_SIZE=$(parse_size $OPTARG)
+      ;;
+    b)
+      BEGIN=$OPTARG
+      ;;
+    e)
+      END=$OPTARG
+      ;;
+    r)
+      REPETITIONS=$OPTARG
+      ;;
+    p)
+      REPLICATION_POLICY=$OPTARG
+      ;;
+    f)
+      REPLICATION_FACTOR=$OPTARG
+      ;;
+    v)
+      SLEEP=false
+      set -x
+      ;;
+    \?)
+      echo "Invalid option: -$OPTARG" >&2
+      exit 1
+      ;;
+    :)
+      echo "Option -$OPTARG requires an argument." >&2
+      exit 1
+      ;;
+  esac
+done
+
+init_params
+drop_caches
+
+echo "Running:" $0 $@  >&2
+
+for i in $THREADS; do
+  size="$(echo "$SIZE/$i"|bc)"
+  if [ $TYPE != "usw" ]; then
+    size="$(echo "($size/$REQUEST_SIZE)*$REQUEST_SIZE" | bc)" # round down to a size divideable through the REQUEST_SIZE
+  fi
+
+  if [ $TYPE = "sr" ]; then
+    prepare_seq_read $size $i
+    cleanup_osd
+  elif [ $TYPE = "rw" ] || [ $TYPE = "rr" ]; then
+    prepare_random $i
+  fi
+
+  for j in $REPETITIONS; do
+    echo "Start $i-Thread-Benchmark Nr. $j" >&2
+
+    run_benchmark $TYPE $size $i
+
+    echo "Finished $i-Thread-Benchmark Nr. $j" >&2
+
+  done
+
+  # seq write benchmarks run cleanup after every benchmark, so this would be redundant
+  if [ $TYPE != "sw" ] && [ $TYPE != "usw" ]; then
+    volume_index=$(echo "$i-1" | bc)
+    for i in $(seq 0 $volume_index); do
+      rmfs.xtreemfs -f $MRC/benchmark$i >&2
+      echo "Remove volume benchmark$i"  >&2
+    done
+    cleanup_osd
+  fi
+
+done
--- a/contrib/benchmark/drop_caches
+++ b/contrib/benchmark/drop_caches
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+/bin/bash -c "echo 3 > /proc/sys/vm/drop_caches"
--- a/contrib/filter-MRC-dump-with-XSLT/filter_files.xslt
+++ b/contrib/filter-MRC-dump-with-XSLT/filter_files.xslt
@@ -0,0 +1,69 @@
+<xsl:stylesheet version="1.0"
+                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+                xmlns:date="http://exslt.org/dates-and-times"
+                extension-element-prefixes="date">
+                
+<!--
+Copyright (c) 2013 by Michael Berlin, Zuse Institute Berlin
+
+Licensed under the BSD License, see LICENSE file for details.
+
+This file transforms a MRC database dump (in XML format) into
+a list of files. The output format is as follows:
+
+  volume name/path on volume|creation time|file size|file's owner name
+
+The current version lists only files which are placed on an OSD
+with the UUID 'zib.mosgrid.osd15' (see line 34).
+
+You can use the 'xsltproc' tool to apply this transformation to a XML dump.
+
+Example: xsltproc -o filtered_files_output.txt filter_files.xslt /tmp/dump.xml
+-->
+
+<xsl:output omit-xml-declaration="yes"/>
+
+<!--Strip off white space from all elements. We take care of the format on our own.-->
+<xsl:strip-space elements="*"/>
+
+<!--For each volume, process its "file" elements.-->
+<xsl:template match="volume">
+  <xsl:apply-templates select="//file"/>
+</xsl:template>
+
+<xsl:template match="file[xlocList/xloc/osd/@location='zib.mosgrid.osd15']">
+  <!--Traverse the path of the <file> element and output the 'name' attribute of
+  each element to display the file system path.
+  The first entry is the name of the volume.-->
+  <xsl:for-each select="ancestor-or-self::*/@name">
+    <!--We ignore the <volume> element because its name is repeated as <dir> element below.-->
+    <xsl:if test="local-name(..) != 'volume'">
+    
+      <!--Output path element.-->
+      <xsl:value-of select="."/>
+      
+      <xsl:if test="position() != last()">
+        <!--Display separator.-->
+        <xsl:text>/</xsl:text>
+      </xsl:if>
+      
+    </xsl:if>
+  </xsl:for-each>
+  
+  <!--Creation time.-->
+  <xsl:text>|</xsl:text>
+  <xsl:value-of select="date:add('1970-01-01T00:00:00Z', date:duration(@ctime))"/>
+
+  <!--File size.-->
+  <xsl:text>|</xsl:text>
+  <xsl:value-of select="@size"/>
+
+  <!--Owner.-->
+  <xsl:text>|</xsl:text>
+  <xsl:value-of select="@uid"/>
+  
+  <!--New line.-->
+  <xsl:text>&#xa;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
--- a/contrib/ganglia-plugin/.project
+++ b/contrib/ganglia-plugin/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>ganglia-plugin</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.python.pydev.PyDevBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.python.pydev.pythonNature</nature>
+	</natures>
+</projectDescription>
--- a/contrib/ganglia-plugin/.pydevproject
+++ b/contrib/ganglia-plugin/.pydevproject
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?eclipse-pydev version="1.0"?>
+
+<pydev_project>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
+<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
+<path>/ganglia-plugin/src</path>
+</pydev_pathproperty>
+</pydev_project>
--- a/contrib/ganglia-plugin/README.txt
+++ b/contrib/ganglia-plugin/README.txt
@@ -0,0 +1,6 @@
+How to use this plugin?
+
+- Make sure your ganglia installation supports python plugins.
+- Copy plugin files from src directory to /usr/lib/ganglia/python_modules/
+- Copy configuration files configuration-files directory to /etc/ganglia/conf.d
+- Alter configuration files to suite your needs.
--- a/contrib/ganglia-plugin/config-files/xtfs-dir.pyconf
+++ b/contrib/ganglia-plugin/config-files/xtfs-dir.pyconf
@@ -0,0 +1,61 @@
+modules {
+  module {
+    name = "xtfs-dir-plugin"
+    language = "python"
+    # The following params are examples only
+    param Host {
+      value = localhost
+    }
+    param Port {
+      value = 9001
+    }
+    param CommunityString {
+      value = public
+    }
+  }
+}
+
+collection_group {
+  collect_every = 60
+  time_threshold = 10
+  metric {
+    name = "dir_jvm_used_mem"
+    title = "used memory of the jvm"
+    value_threshold = 1
+  }
+  metric {
+    name = "dir_jvm_free_mem"
+    title = "free memory of the jvm"
+    value_threshold = 1
+  } 
+  metric {
+    name = "dir_client_connections"
+    title = "number of Clients"
+    value_threshold = 1
+  }
+  metric {
+    name = "dir_pending_requests"
+    title = "number of pending requests"
+    value_threshold = 1
+  }
+  metric {
+    name = "addr_mapping_count"
+    title = "number of address mappings"
+    value_threshold = 1
+  }
+  metric {
+    name = "service_count"
+    title = "number of services"
+    value_threshold = 1
+  }
+  metric {
+    name = "dir_status"
+    title = "Status DIR"
+  }
+  metric {
+    name = "dir_uuid"
+    title = "DIR UUID"
+  }
+}
+
+ 
--- a/contrib/ganglia-plugin/config-files/xtfs-mrc.pyconf
+++ b/contrib/ganglia-plugin/config-files/xtfs-mrc.pyconf
@@ -0,0 +1,56 @@
+modules {
+  module {
+    name = "xtfs-mrc-plugin"
+    language = "python"
+    # The following params are examples only
+    param Host {
+      value = localhost
+    }
+    param Port {
+      value = 9002
+    }
+    param CommunityString {
+      value = public
+    }
+  }
+}
+
+collection_group {
+  collect_every = 60
+  time_threshold = 10
+  metric {
+    name = "mrc_jvm_used_mem"
+    title = "used memory of the jvm"
+    value_threshold = 1
+  } 
+  metric {
+    name = "mrc_jvm_free_mem"
+    title = "free memory of the jvm"
+    value_threshold = 1
+  }
+  metric {
+    name = "mrc_client_connections"
+    title = "number of Clients"
+    value_threshold = 1
+  }
+  metric {
+    name = "mrc_pending_requests"
+    title = "number of pending requests"
+    value_threshold = 1
+  }
+  metric {
+    name = "volumes_count"
+    title = "number of volumes"
+    value_threshold = 1
+  } 
+  metric {
+    name = "mrc_status"
+    title = "Status MRC"
+  }
+  metric { 
+    name = "mrc_uuid"
+    title = "MRC UUID"
+  } 
+}
+
+ 
--- a/contrib/ganglia-plugin/config-files/xtfs-osd.pyconf
+++ b/contrib/ganglia-plugin/config-files/xtfs-osd.pyconf
@@ -0,0 +1,107 @@
+modules {
+  module {
+    name = "xtfs-osd-plugin"
+    language = "python"
+    # The following params are examples only
+    param Host {
+      value = localhost
+    }
+    param Port {
+      value = 9003
+    }
+    param CommunityString {
+      value = public
+    }
+  }
+}
+
+collection_group {
+  collect_every = 60
+  time_threshold = 10
+  metric {
+    name = "osd_jvm_used_mem"
+    title = "used memory of the jvm"
+    value_threshold = 1
+  }
+  metric {
+    name = "osd_jvm_free_mem"
+    title = "free memory of the jvm"
+    value_threshold = 1
+  } 
+  metric {
+    name = "osd_client_connections"
+    title = "number of Clients"
+    value_threshold = 1
+  }
+  metric {
+    name = "objects_received"
+    title = "objects received"
+    value_threshold = 1
+  }
+  metric {
+    name = "repl_objects_received"
+    title = "replicated objects received"
+    value_threshold = 1
+  }
+  metric {
+    name = "objects_transmitted"
+    title = "objects transmitted"
+    value_threshold = 1
+  }
+  metric {
+    name = "repl_bytes_received"
+    title = "replicated bytes received"
+    value_threshold = 1
+  }
+  metric {
+    name = "bytes_received"
+    title = "bytes received"
+    value_threshold = 1
+  }
+  metric {
+    name = "bytes_transmitted"
+    title = "bytes transmitted"
+    value_threshold = 1
+  }
+  metric {
+    name = "preproc_queue_length"
+    title = "preprocessing stage queue length"
+    value_threshold = 1
+  }
+  metric {
+    name = "storage_queue_length"
+    title = "storage stage queue length"
+    value_threshold = 1
+  }
+  metric {
+    name = "deletion_queue_length"
+    title = "deletion stage queue length"
+    value_threshold = 1
+  }
+  metric {
+    name = "open_files"
+    title = "open files"
+    value_threshold = 1
+  }
+  metric {
+    name = "deleted_files"
+    title = "deleted files"
+    value_threshold = 1
+  }
+  metric {
+    name = "free_space"
+    title = "free space"
+    value_threshold = 100
+  }
+  metric {
+    name = "osd_status"
+    title = "Status OSD" 
+  } 
+  metric {
+    name = "osd_uuid"
+    title = "OSD UUID"
+  }
+}
+
+ 
+  
--- a/contrib/ganglia-plugin/src/xtfs-dir-plugin.py
+++ b/contrib/ganglia-plugin/src/xtfs-dir-plugin.py
@@ -0,0 +1,250 @@
+'''
+Created on May 25, 2011
+
+@author: bzcseife
+
+This is a python ganglia plugin which monitors the status of an DIR service of the XtreemFS 
+filesystem. It is intend to run on the same host as the DIR and gathers information of the DIR per
+SNMP. Therefore you have to configure your DIR to provide a SNMP Agent on this host.
+
+'''
+#TODO: If ganglia supports 64bit values uses 64bit integers instead of converting all 64 bit integers
+#reported from the SNMP Agent to 32bit integers.
+
+
+import random
+from pysnmp.entity.rfc3413.oneliner import cmdgen
+from pysnmp.entity.rfc3413.oneliner.cmdgen import UdpTransportTarget
+
+    
+descriptors = list()
+Random_Max = 50
+Constant_Value = 50
+
+
+
+#Get the used memory of the JVM
+def JvmUsedMem(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 1, 0))
+    if (errorStatus == False and errorIndication == None):  
+       
+        return int(varBinds[0][1]/1024/1024)
+    else:
+        return 0
+#Get the free memory of the JVM
+def JvmFreeMem(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 2, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1] / 1024 / 1024)
+    else:
+        return 0
+
+#Get the number of client connections
+def ClientConnections(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 8, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0
+
+
+#Get the number of pending requests
+def PendingRequests(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 9, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0
+
+#Get the number of address mappings registered
+def AddressMappingCount(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 2, 1, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0
+
+#Get the number of services  registered
+def ServiceCount(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 2, 2, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0
+
+#get the status of the DIR
+#OID: 1.3.6.1.4.1.38350.1.11.0
+def Status(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 11, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return str(varBinds[0][1])
+    else:
+        return "OFFLINE"
+    
+#get the UUID of the DIR
+#OID: 1.3.6.1.4.1.38350.1.13.0
+def Uuid(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 13, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return str(varBinds[0][1])
+    else:
+        return "Service not available"
+    
+def metric_init(params):
+
+    global descriptors        
+    global Commmunity_String 
+    global Snmp_Port
+    global authData
+    global transportTarget
+    
+        
+    if 'ComummunityString' in params:
+        Community_String = params['CommunityString']
+    else:
+        Community_String = 'public'
+        
+    if 'Port' in params:
+        Snmp_Port = int(params['Port'])
+    if 'Host' in params:
+        Snmp_Host = params['Host']
+    
+    authData = cmdgen.CommunityData('xtreemfs-agent', 'public')
+    transportTarget = cmdgen.UdpTransportTarget((Snmp_Host, Snmp_Port), 1, 0)
+    
+    d0 = {'name': 'dir_jvm_used_mem',
+        'call_back': JvmUsedMem,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'Megabytes',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The amount of memory the JVM uses currently.',
+        'groups': 'dir'}
+        
+    d1 = {'name': 'dir_jvm_free_mem',
+        'call_back': JvmFreeMem,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'Megabytes',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The amount of free memory the JVM can still use.',
+        'groups': 'dir'}
+    
+    d2 = {'name': 'dir_client_connections',
+        'call_back': ClientConnections,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'clients',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The number of active client connection this DIR has currently to handle.',
+        'groups': 'dir'}
+
+    d3 = {'name': 'dir_pending_requests',
+        'call_back': PendingRequests,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'pending requests',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The number of pending requests this DIR has enqueued.',
+        'groups': 'dir'}    
+ 
+    d4 = {'name': 'addr_mapping_count',
+        'call_back': AddressMappingCount,
+        'time_max': 90,
+        #value_type: string | uint | float | double
+        'value_type': 'uint',
+        #units: unit of your metric
+        'units': 'mappings',
+        #slope: zero | positive | negative | both
+            #This value maps to the data source types defined for RRDTool
+            #If 'positive', RRD file generated will be of COUNTER type (calculating the rate of change)
+            #If 'negative', ????
+            #'both' will be of GAUGE type (no calculations are performed, graphing only the value reported)
+            #If 'zero', the metric will appear in the "Time and String Metrics" or the "Constant Metrics" depending on the value_type of the m
+        'slope': 'both',
+        #format: format string of your metric
+            #Must correspond to value_type otherwise value of your metric will be unpredictable (reference: http://docs.python.org/library/stdtypes.html#string-formatting)
+        'format': '%u',
+        #description: description of your metric
+        'description': 'The number of address mapping registered at the DIR.',
+        #groups (optional): groups your metric belongs to
+        'groups': 'dir'}
+    
+    d5 = {'name': 'service_count',
+        'call_back': ServiceCount,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'services',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The number of services registered at the DIR.',
+        'groups': 'dir'}   
+    
+    d6 = {'name': 'dir_status',
+        'call_back': Status,
+        'time_max': 90,
+        'value_type': 'string',
+        'units': '',
+        'slope': 'zero',
+        'format': '%s',
+        'description': 'ONLINE if this DIR is running correctly, OFFLINE otherwise',
+        'groups': 'dir'}
+    
+    d7 = {'name': 'dir_uuid',
+        'call_back': Uuid,
+        'time_max': 90,
+        'value_type': 'string',
+        'units': '',
+        'slope': 'zero',
+        'format': '%s',
+        'description': 'UUID of the DIR running on this host',
+        'groups': 'dir'}
+
+    
+    descriptors = [d0, d1, d2, d3, d4, d5, d6, d7]
+    
+    return descriptors
+    
+def metric_cleanup():
+    '''Clean up the metric module.'''
+    pass
+
+
+#for debugging purpose     
+if __name__ == '__main__':
+    params = {'CommunityString': 'public', 'Host': 'localhost', 'Port': 9001}
+    metric_init(params)
+    for d in descriptors:
+        v = d['call_back'](d['name'])
+        print 'value for %s is' % (d['name'])
+        print v
+ 
+
+
--- a/contrib/ganglia-plugin/src/xtfs-mrc-plugin.py
+++ b/contrib/ganglia-plugin/src/xtfs-mrc-plugin.py
@@ -0,0 +1,221 @@
+'''
+Created on May 25, 2011
+
+@author: bzcseife
+
+This is a python ganglia plugin which monitors the status of an DIR service of the XtreemFS 
+filesystem. It is intend to run on the same host as the MRC and gathers information of the MRC per
+SNMP. Therefore you have to configure your MRC to provide a SNMP Agent on this host.
+
+'''
+#TODO: If ganglia supports 64bit values uses 64bit integers instead of converting all 64 bit integers
+#reported from the SNMP Agent to 32bit integers.
+
+
+import random
+from pysnmp.entity.rfc3413.oneliner import cmdgen
+from pysnmp.entity.rfc3413.oneliner.cmdgen import UdpTransportTarget
+
+    
+descriptors = list()
+Random_Max = 50
+Constant_Value = 50
+
+
+
+#Get the used memory of the JVM
+def JvmUsedMem(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 1, 0))
+
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1] / 1024 / 1024)
+    else:
+        return 0       
+
+#Get the free memory of the JVM
+def JvmFreeMem(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 2, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1] / 1024 / 1024)
+    else:
+        return 0       
+
+
+#Get the number of client connections
+def ClientConnections(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 7, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+
+#Get the number of pending requests
+def PendingRequests(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 8, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+#Get the number of volumes
+def VolumeCount(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 3, 2, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+    
+#get the status of the MRC
+def Status(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 11, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return str(varBinds[0][1])
+    else:
+        return "OFFLINE"       
+    
+    
+#get the UUID of the MRC
+#OID: 1.3.6.1.4.1.38350.1.13.0
+def Uuid(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 13, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return str(varBinds[0][1])
+    else:
+        return "Service not available"
+    
+    
+def metric_init(params):
+
+    global descriptors        
+    global Commmunity_String 
+    global Snmp_Port
+    global authData
+    global transportTarget
+    
+        
+    if 'ComummunityString' in params:
+        Community_String = params['CommunityString']
+    else:
+        Community_String = 'public'
+        
+    if 'Port' in params:
+        Snmp_Port = int(params['Port'])
+    if 'Host' in params:
+        Snmp_Host = params['Host']
+    
+    authData = cmdgen.CommunityData('xtreemfs-agent', 'public')
+    transportTarget = cmdgen.UdpTransportTarget((Snmp_Host, Snmp_Port),1 ,0)
+    
+    d0 = {'name': 'mrc_jvm_used_mem',
+        'call_back': JvmUsedMem,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'Megabytes',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The amount of memory the JVM uses currently.',
+        'groups': 'mrc'}
+    
+    d1 = {'name': 'mrc_jvm_free_mem',
+        'call_back': JvmFreeMem,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'Megabytes',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The amount of free memory the JVM can still use.',
+        'groups': 'mrc'} 
+     
+    d2 = {'name': 'mrc_client_connections',
+        'call_back': ClientConnections,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'clients',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The number of active client connection this MRC has currently to handle.',
+        'groups': 'mrc'}
+
+    d3 = {'name': 'mrc_pending_requests',
+        'call_back': PendingRequests,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'pending requests',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The number of pending requests this MRC has enqueued.',
+        'groups': 'mrc'}    
+    
+    d4 = {'name': 'volumes_count',
+        'call_back': VolumeCount,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'volumes',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The number of volumes on this MRC.',
+        'groups': 'mrc'}    
+ 
+    d5 = {'name': 'mrc_status',
+        'call_back': Status,
+        'time_max': 90,
+        'value_type': 'string',
+        'units': '',
+        'slope': 'zero',
+        'format': '%s',
+        'description': 'ONLINE if this OSD is running correctly, OFFLINE otherwise',
+        'groups': 'mrc'}
+    
+    d6 = {'name': 'mrc_uuid',
+        'call_back': Uuid,
+        'time_max': 90,
+        'value_type': 'string',
+        'units': '',
+        'slope': 'zero',
+        'format': '%s',
+        'description': 'UUID of the MRC running on this host',
+        'groups': 'mrc'}
+       
+   
+     
+    descriptors = [d0, d1, d2, d3, d4, d5, d6 ]
+    
+    return descriptors
+    
+def metric_cleanup():
+    '''Clean up the metric module.'''
+    pass
+
+
+#for debugging purpose     
+if __name__ == '__main__':
+    params = {'CommunityString': 'public', 'Host': 'localhost', 'Port': 9002}
+    metric_init(params)
+    for d in descriptors:
+        v = d['call_back'](d['name'])
+        print 'value for %s is ' % (d['name'])
+        print v 
+ 
+
+
--- a/contrib/ganglia-plugin/src/xtfs-osd-plugin.py
+++ b/contrib/ganglia-plugin/src/xtfs-osd-plugin.py
@@ -0,0 +1,477 @@
+'''
+Created on May 25, 2011
+
+@author: bzcseife
+
+This is a python ganglia plugin which monitors the status of an OSD service of the XtreemFS 
+filesystem. It is intend to run on the same host as the OSD and gathers information of the OSD per
+SNMP. Therefore you have to configure your OSD to provide a SNMP Agent on this host.
+
+'''
+#TODO: If ganglia supports 64bit values uses 64bit integers instead of converting all 64 bit integers
+#reported from the SNMP Agent to 32bit integers.
+
+
+import random
+from pysnmp.entity.rfc3413.oneliner import cmdgen
+from pysnmp.entity.rfc3413.oneliner.cmdgen import UdpTransportTarget
+
+
+
+    
+descriptors = list()
+Random_Max = 50
+Constant_Value = 50
+
+
+
+#Get the used memory of the JVM
+def JvmUsedMem(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 1, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1] / 1024 / 1024)
+    else:
+        return 0       
+
+
+
+#Get the free memory of the JVM
+def JvmFreeMem(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 2, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1] / 1024 / 1024)
+    else:
+        return 0       
+
+
+#Get the number of client connections
+def ClientConnections(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 7, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+
+#Get the number of pending requests
+def PendingRequests(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 8, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+#Get the number of objects received
+def ObjectsReceived(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 1, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+#Get the number of replicated objects received
+def ReplObjectsReceived(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 2, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+#Get the number of replicated objects transmitted
+def ObjectsTransmitted(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 3, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+#Get the number of replicated bytes received
+def ReplBytesReceived(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 4, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1] / 1024 / 1024)
+    else:
+        return 0       
+
+#Get the number of bytes received
+def BytesReceived(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 5, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1] / 1024 / 1024)
+    else:
+        return 0       
+
+#Get the number of bytes transmitted
+def BytesTransmitted(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 6, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1] / 1024 / 1024)
+    else:
+        return 0       
+
+#Get the length of the preprocessing stage queue 
+def PreprocQueueLength(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 7, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1] / 1024 / 1024)
+    else:
+        return 0       
+
+#Get the length of the storage stage queue 
+def StorageQueueLength(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 8, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+#Get the length of the deletion stage queue 
+def DeletionQueueLength(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 9, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+
+#Get the number of open files from the OSD per snmp
+def OsdOpenFiles(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 10, 0))
+       
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+
+
+#Get the number of deleted files from the OSD per snmp
+def OsdDeletedFiles(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 11, 0))
+       
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1])
+    else:
+        return 0       
+
+
+#Get the free space from the OSD per snmp
+def OsdFreeSpace(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 4, 12, 0))
+    if (errorStatus == False and errorIndication == None):       
+        return int(varBinds[0][1] / 1024 / 1024)
+    else:
+        return 0       
+
+#get the status of the OSD
+def Status(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 11, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return str(varBinds[0][1])
+    else:
+        return "OFFLINE"
+
+#get the UUID of the OSD
+#OID: 1.3.6.1.4.1.38350.1.13.0
+def Uuid(name):
+    errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd(authData,
+                                                                                        transportTarget,
+                                                                                        (1, 3, 6, 1, 4, 1, 38350, 1, 13, 0))
+            
+    if (errorStatus == False and errorIndication == None):       
+        return str(varBinds[0][1])
+    else:
+        return "Service not available"
+    
+def metric_init(params):
+
+    global descriptors        
+    global Commmunity_String 
+    global Snmp_Port
+    global authData
+    global transportTarget
+    
+        
+    if 'ComummunityString' in params:
+        Community_String = params['CommunityString']
+    else:
+        Community_String = 'public'
+        
+    if 'Port' in params:
+        Snmp_Port = int(params['Port'])
+    if 'Host' in params:
+        Snmp_Host = params['Host']
+    
+    authData = cmdgen.CommunityData('xtreemfs-agent', 'public')
+    transportTarget = cmdgen.UdpTransportTarget((Snmp_Host, Snmp_Port),1,0)
+    
+    d0 = {'name': 'osd_jvm_used_mem',
+        'call_back': JvmUsedMem,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'Megabytes',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The amount of memory the JVM uses currently.',
+        'groups': 'osd'}
+    
+    d1 = {'name': 'osd_jvm_free_mem',
+        'call_back': JvmFreeMem,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'Megabytes',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The amount of free memory the JVM can still use.',
+        'groups': 'osd'}    
+    
+    d2 = {'name': 'osd_client_connections',
+        'call_back': ClientConnections,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'clients',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The number of active client connection this OSD has currently to handle.',
+        'groups': 'osd'}
+
+    d3 = {'name': 'osd_pending_requests',
+        'call_back': PendingRequests,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'pending requests',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The number of pending requests this OSD has enqueued.',
+        'groups': 'osd'}
+    
+    d4 = {'name': 'objects_received',
+        'call_back': ObjectsReceived,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'objects',
+        'slope': 'positive',
+        'format': '%u',
+        'description': 'The number of objects this OSD has received.',
+        'groups': 'osd'}
+
+    d5 = {'name': 'repl_objects_received',
+        'call_back': ReplObjectsReceived,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'objects',
+        'slope': 'positive',
+        'format': '%u',
+        'description': 'The number of replicated objects this OSD has received.',
+        'groups': 'osd'}
+        
+    d6 = {'name': 'objects_transmitted',
+        'call_back': ObjectsTransmitted,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'objects',
+        'slope': 'positive',
+        'format': '%u',
+        'description': 'The number of objects this OSD has transmitted.',
+        'groups': 'osd'}
+
+    d7 = {'name': 'repl_bytes_received',
+        'call_back': ReplBytesReceived,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'Megabytes',
+        'slope': 'positive',
+        'format': '%u',
+        'description': 'The number of replicated bytes this OSD has received.',
+        'groups': 'osd'}
+    
+    d8 = {'name': 'bytes_received',
+        'call_back': BytesReceived,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'Megabytes',
+        'slope': 'positive',
+        'format': '%u',
+        'description': 'The number of bytes this OSD has received.',
+        'groups': 'osd'}
+
+    d9 = {'name': 'bytes_transmitted',
+        'call_back': BytesTransmitted,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'Megabytes',
+        'slope': 'positive',
+        'format': '%u',
+        'description': 'The number of bytes this OSD has transmitted.',
+        'groups': 'osd'}
+
+    d10 = {'name': 'preproc_queue_length',
+        'call_back': PreprocQueueLength,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'requests',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The length of the preprocessing stage queue of this OSD.',
+        'groups': 'osd'}
+
+    d11 = {'name': 'storage_queue_length',
+        'call_back': StorageQueueLength,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'requests',
+        'slope': 'positive',
+        'format': '%u',
+        'description': 'The length of the storage stage queue of this OSD.',
+        'groups': 'osd'}
+
+    d12 = {'name': 'deletion_queue_length',
+        'call_back': DeletionQueueLength,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'requests',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The length of the deletion stage queue of this OSD.',
+        'groups': 'osd'}
+
+    d13 = {'name': 'storage_queue_length',
+        'call_back': StorageQueueLength,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'requests',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The length of the storage stage queue of this OSD.',
+        'groups': 'osd'}
+
+    d14 = {'name': 'open_files',
+        'call_back': OsdOpenFiles,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'files',
+        'slope': 'both',
+        'format': '%u',
+        'description': 'The number of file this OSD has currently opened.',
+        'groups': 'osd'}
+
+    d15 = {'name': 'deleted_files',
+        'call_back': OsdDeletedFiles,
+        'time_max': 90,
+        'value_type': 'uint',
+        'units': 'files',
+        'slope': 'positive',
+        'format': '%u',
+        'description': 'The number of deleted files on this OSD',
+        'groups': 'osd'}
+
+
+    d16 = {'name': 'free_space',
+        'call_back': OsdFreeSpace,
+        'time_max': 90,
+        #value_type: string | uint | float | double
+        'value_type': 'uint',
+        #units: unit of your metric
+        'units': 'Megabytes',
+        #slope: zero | positive | negative | both
+            #This value maps to the data source types defined for RRDTool
+            #If 'positive', RRD file generated will be of COUNTER type (calculating the rate of change)
+            #If 'negative', ????
+            #'both' will be of GAUGE type (no calculations are performed, graphing only the value reported)
+            #If 'zero', the metric will appear in the "Time and String Metrics" or the "Constant Metrics" depending on the value_type of the m
+        'slope': 'both',
+        #format: format string of your metric
+            #Must correspond to value_type otherwise value of your metric will be unpredictable (reference: http://docs.python.org/library/stdtypes.html#string-formatting)
+        'format': '%u',
+        #description: description of your metric
+        'description': 'The free disc space on the partition this OSD stores the object files.',
+        #groups (optional): groups your metric belongs to
+        'groups': 'osd'}
+    
+    d17 = {'name': 'osd_status',
+        'call_back': Status,
+        'time_max': 90,
+        'value_type': 'string',
+        'units': '',
+        'slope': 'zero',
+        'format': '%s',
+        'description': 'ONLINE if this OSD is running correctly, OFFLINE otherwise',
+        'groups': 'osd'}
+    
+    d18 = {'name': 'osd_uuid',
+        'call_back': Uuid,
+        'time_max': 90,
+        'value_type': 'string',
+        'units': '',
+        'slope': 'zero',
+        'format': '%s',
+        'description': 'UUID of the OSD running on this host',
+        'groups': 'osd'}
+    
+    descriptors = [d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12, d13, d14, d15, d16, d17, d18]
+    
+    return descriptors
+    
+def metric_cleanup():
+    '''Clean up the metric module.'''
+    pass
+
+
+#for debugging purpose     
+if __name__ == '__main__':
+    params = {'CommunityString': 'public', 'Host': 'localhost', 'Port': 9003}
+    metric_init(params)
+    for d in descriptors:
+        v = d['call_back'](d['name'])
+        print 'value for %s is' % (d['name'])
+        print v
+
+
--- a/contrib/osd-health/osd_health_check.sh
+++ b/contrib/osd-health/osd_health_check.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+OBJECT_DIR=$1
+
+# get device for object_dir
+IFS=' ' read -r DEVICE TMP <<< $(df $OBJECT_DIR | grep dev)
+
+# Determine device type
+if [[ $DEVICE == *md* ]]; then
+   # DEVICE is a RAID configuration
+   DEVICES=$(IFS=' ' read -a TMP <<< $(cat /proc/mdstat | grep $DEVICE))
+   DEVICES=${DEVICES[@]:4}
+elif [[ $DEVICE == *sd* || $DEVICE == *hd* ]]; then
+   # DEVICE is a single disk
+   DEVICES=$DEVICE
+else
+   # unsupported device type
+   echo "unsupported device type"
+   exit 3
+fi
+
+for DEVICE in $DEVICES; do     
+   SMART_STATUS="$(sudo smartctl --health $DEVICE)"
+   echo $SMART_STATUS
+   if [[ $SMART_STATUS == *PASSED* ]]
+      then
+         continue;
+   elif [[ $SMART_STATUS == *FAILED* ]]
+      then
+         exit 1
+   else 
+      exit 3
+   fi
+done 
+
+# If no device's health test failed, return 0 (i.e. health test PASSED).
+exit 0
--- a/contrib/server-repl-plugin/BabuDB_replication_plugin.jar
+++ b/contrib/server-repl-plugin/BabuDB_replication_plugin.jar
--- a/contrib/server-repl-plugin/LICENSE
+++ b/contrib/server-repl-plugin/LICENSE
@@ -0,0 +1,32 @@
+Copyright (c) 2008-2011, Jan Stender, Bjoern Kolbeck, Mikael Hoegqvist,
+                    Felix Hupfeld, Felix Langner, Zuse Institute Berlin
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+    * Redistributions of source code must retain the above
+      copyright notice, this list of conditions and the
+      following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials
+      provided with the distribution.
+    * Neither the name of the Zuse Institute Berlin nor the
+      names of its contributors may be used to endorse or promote
+      products derived from this software without specific prior
+      written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
--- a/contrib/server-repl-plugin/README
+++ b/contrib/server-repl-plugin/README
@@ -0,0 +1,3 @@
+For further details on how to use the BabuDB replication plug-in, please refer to the BabuDB Wiki:
+
+http://code.google.com/p/babudb/wiki/UsageReplicationForJava
--- a/contrib/server-repl-plugin/config/dir.properties
+++ b/contrib/server-repl-plugin/config/dir.properties
@@ -0,0 +1,82 @@
+#####################################################################
+# BabuDB replication plugin configuration (DIR replication)         #
+#####################################################################
+
+#####################################################################
+# List of replicas and replication configuration
+#####################################################################
+
+# participants of the replication including this replica
+babudb.repl.participant.0 = first-DIR-replica
+babudb.repl.participant.0.port = 35678
+babudb.repl.participant.1 = second-DIR-replica
+babudb.repl.participant.1.port = 35678
+babudb.repl.participant.2 = third-DIR-replica
+babudb.repl.participant.2.port = 35678
+
+# number of servers that at least have to be up to date
+# To have a fault-tolerant system, this value has to be set to the
+# majority of nodes i.e., if you have three replicas, set this to 2
+# Please note that a setup with two nodes provides no fault-tolerance.
+babudb.repl.sync.n = 2
+
+#####################################################################
+# Advanced Options (usually you do NOT have to edit these)
+#####################################################################
+
+# It's possible to set the local address and port of this server explicitly.
+# If not, it will be chosen from the list of participants.
+#babudb.repl.localhost = localhost
+#babudb.repl.localport = 35678
+
+# Choose here one of the predefined policies for handling database requests:
+#
+# MasterOnly       - Redirect any kind of request to the master.
+#                    Provides strong consistency.
+# WriteRestriction - Same as MasterOnly plus lookup operations are also permitted on the slaves.
+#                    Consequently, clients may read stale values from a backup replica.
+# NoRestriction    - Allows any kind of request to be performed at the local BabuDB instance.
+#                    May result into conflicts which are not resolved.
+#
+# default setting is MasterOnly.
+#babudb.repl.policy = MasterOnly
+
+# DB backup directory - needed for the initial loading of the BabuDB from the 
+#                       master in replication context
+babudb.repl.backupDir = /var/lib/xtreemfs/server-repl-dir
+
+#####################################################################
+# SSL options (disabled by default)
+#####################################################################
+
+# specify whether SSL is required
+#babudb.ssl.enabled = false
+
+# server credentials for SSL handshakes
+#babudb.ssl.service_creds = /etc/xos/xtreemfs/truststore/certs/osd.p12
+#babudb.ssl.service_creds.pw = xtreemfs
+#babudb.ssl.service_creds.container = pkcs12
+
+# trusted certificates for SSL handshakes
+#babudb.ssl.trusted_certs = /etc/xos/xtreemfs/truststore/certs/xosrootca.jks
+#babudb.ssl.trusted_certs.pw = xtreemfs
+#babudb.ssl.trusted_certs.container = jks
+
+#babudb.ssl.authenticationWithoutEncryption = false
+
+#####################################################################
+# Internal options (usually do not have to be touched)
+#####################################################################
+plugin.jar = /usr/share/java/BabuDB_replication_plugin.jar
+
+# paths to libraries this plugin depends on
+babudb.repl.dependency.0 = /usr/share/java/Flease.jar
+
+# local time renew in milliseconds
+#babudb.localTimeRenew = 0
+
+# chunk size, for initial load of file chunks
+#babudb.repl.chunkSize = 5242880
+
+# decides whether redirects should be handled by the user-application or not
+#babudb.repl.redirectIsVisible = false
--- a/contrib/server-repl-plugin/config/mrc.properties
+++ b/contrib/server-repl-plugin/config/mrc.properties
@@ -0,0 +1,82 @@
+#####################################################################
+# BabuDB replication plugin configuration (MRC replication)         #
+#####################################################################
+
+#####################################################################
+# List of replicas and replication configuration
+#####################################################################
+
+# participants of the replication including this replica
+babudb.repl.participant.0 = first-MRC-replica
+babudb.repl.participant.0.port = 35676
+babudb.repl.participant.1 = second-MRC-replica
+babudb.repl.participant.1.port = 35676
+babudb.repl.participant.2 = third-MRC-replica
+babudb.repl.participant.2.port = 35676
+
+# number of servers that at least have to be up to date
+# To have a fault-tolerant system, this value has to be set to the
+# majority of nodes i.e., if you have three replicas, set this to 2
+# Please note that a setup with two nodes provides no fault-tolerance.
+babudb.repl.sync.n = 2
+
+#####################################################################
+# Advanced Options (usually you do NOT have to edit these)
+#####################################################################
+
+# It's possible to set the local address and port of this server explicitly.
+# If not, it will be chosen from the list of participants.
+#babudb.repl.localhost = localhost
+#babudb.repl.localport = 35676
+
+# Choose here one of the predefined policies for handling database requests:
+#
+# MasterOnly       - Redirect any kind of request to the master.
+#                    Provides strong consistency.
+# WriteRestriction - Same as MasterOnly plus lookup operations are also permitted on the slaves.
+#                    Consequently, clients may read stale values from a backup replica.
+# NoRestriction    - Allows any kind of request to be performed at the local BabuDB instance.
+#                    May result into conflicts which are not resolved.
+#
+# default setting is MasterOnly.
+#babudb.repl.policy = MasterOnly
+
+# DB backup directory - needed for the initial loading of the BabuDB from the 
+#                       master in replication context
+babudb.repl.backupDir = /var/lib/xtreemfs/server-repl-mrc
+
+#####################################################################
+# SSL options (disabled by default)
+#####################################################################
+
+# specify whether SSL is required
+#babudb.ssl.enabled = false
+
+# server credentials for SSL handshakes
+#babudb.ssl.service_creds = /etc/xos/xtreemfs/truststore/certs/osd.p12
+#babudb.ssl.service_creds.pw = xtreemfs
+#babudb.ssl.service_creds.container = pkcs12
+
+# trusted certificates for SSL handshakes
+#babudb.ssl.trusted_certs = /etc/xos/xtreemfs/truststore/certs/xosrootca.jks
+#babudb.ssl.trusted_certs.pw = xtreemfs
+#babudb.ssl.trusted_certs.container = jks
+
+#babudb.ssl.authenticationWithoutEncryption = false
+
+#####################################################################
+# Internal options (usually do not have to be touched)
+#####################################################################
+plugin.jar = /usr/share/java/BabuDB_replication_plugin.jar
+
+# paths to libraries this plugin depends on
+babudb.repl.dependency.0 = /usr/share/java/Flease.jar
+
+# local time renew in milliseconds
+#babudb.localTimeRenew = 0
+
+# chunk size, for initial load of file chunks
+#babudb.repl.chunkSize = 5242880
+
+# decides whether redirects should be handled by the user-application or not
+#babudb.repl.redirectIsVisible = false
--- a/contrib/server-repl-plugin/update_BabuDB_replication_plugin_jar.sh
+++ b/contrib/server-repl-plugin/update_BabuDB_replication_plugin_jar.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+# Copyright (c) 2012 Michael Berlin, Zuse Institute Berlin
+# Licensed under the BSD License, see LICENSE file for details.
+
+set -e
+
+trap onexit 1 2 3 15 ERR
+
+function onexit() {
+    local exit_status=${1:-$?}
+    echo ERROR: Exiting $0 with $exit_status
+    exit $exit_status
+}
+
+replication_dir_in_babudb_trunk="java/replication"
+
+cat <<EOF
+This script updates the binary .jar file which contains the BabuDB replication plugin.
+
+EOF
+
+if [ -z "$BABUDB" ]
+then
+  known_babudb_dirs="../../../../googlecode-svn-babudb/trunk"
+  for dir in $known_babudb_dirs
+  do
+    if [ -d "$dir" ]
+    then
+      BABUDB="$dir"
+    fi
+  done
+fi
+
+if [ -z "$BABUDB" ]
+then
+  echo "The environment variable BABUDB was not set. Please point it to a checkout directory of the SVN trunk of the BabuDB project (svn checkout http://babudb.googlecode.com/svn/trunk/ babudb)."
+  exit 1
+fi
+
+if [ ! -d "$BABUDB" ]
+then
+  echo "The environment variable BABUDB does not point to an existing directory. BABUDB = ${BABUDB}"
+  exit 1
+fi
+
+echo "Updating the .jar files required by the BabuDB replication code first..."
+${BABUDB}/${replication_dir_in_babudb_trunk}/update_jar_dependencies.sh
+echo "Finished updating the .jar files required by the BabuDB replication code."
+
+echo "compiling BabuDB replication plugion (BabuDB_replication_plugin.jar)"
+babudb_replication_buildfile="${BABUDB}/${replication_dir_in_babudb_trunk}/build.xml"
+babudb_replication_jar_source="${BABUDB}/${replication_dir_in_babudb_trunk}/../dist/replication/BabuDB_replication_plugin.jar"
+babudb_replication_jar_dest=$(dirname "$0")
+
+# ant clean -f "$babudb_replication_buildfile" >/dev/null
+ant jar -f "$babudb_replication_buildfile" >/dev/null
+cp -a "$babudb_replication_jar_source" "$babudb_replication_jar_dest"
+
+echo "finished compiling BabuDB replication plugion (BabuDB_replication_plugin.jar)"
--- a/contrib/travis/parse_results.py
+++ b/contrib/travis/parse_results.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2014 by Johannes Dillmann, Zuse Institute Berlin
+# Licensed under the BSD License, see LICENSE file for details.
+
+import sys
+
+import argparse
+import json
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("file", type=argparse.FileType('r'))
+    parser.add_argument("test")
+    args = parser.parse_args()
+
+    results = json.load(args.file)
+    result = None
+
+    if args.test in results:
+        result = results[args.test]
+
+    if type(result) == bool and result:
+        print "true"
+        sys.exit(0)
+
+    if type(result) == dict and all(result.values()):
+        print "true"
+        sys.exit(0)
+
+    print "false"
+    sys.exit(1)
+
--- a/contrib/vagrant/provision.sh
+++ b/contrib/vagrant/provision.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+echo "Installing XtreemFS build dependencies"
+apt-get -y update
+apt-get -y install openjdk-7-jdk ant build-essential libssl-dev libfuse-dev libattr1-dev cmake libboost-regex-dev libboost-program-options-dev libboost-thread-dev libboost-system-dev valgrind
+echo "export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64" >> /etc/bash.bashrc
+echo "export BUILD_CLIENT_TESTS=true" >> /etc/bash.bashrc
--- a/contrib/xtreemfs-osd-farm/xtreemfs-osd-farm
+++ b/contrib/xtreemfs-osd-farm/xtreemfs-osd-farm
@@ -0,0 +1,223 @@
+#!/bin/bash
+
+### BEGIN INIT INFO
+# Provides:          xtreemfs-osd-farm
+# Required-Start:    $network $remote_fs
+# Required-Stop:     $network $remote_fs
+# Should-Start:      xtreemfs-dir
+# Should-Stop:       $null
+# Default-Start:     3 5
+# Default-Stop:      0 1 2 6
+# Short-Description: XtreemFS OSD init.d script which can start multiple OSDs on the same machine in contrast to xtreemfs-osd
+# Description:       XtreemFS Object Storage Device (OSD). http://www.xtreemfs.org/
+### END INIT INFO
+
+# Source function library.
+if [ -e /lib/lsb/init-functions ]; then
+  . /lib/lsb/init-functions
+else
+  . /etc/init.d/functions
+fi
+
+XTREEMFS_USER=xtreemfs
+
+# List of OSD instances which shall be started, seperated by spaces.
+# For every OSD there has to be a configuration file.
+OSD_INSTANCES="osd1 osd2 osd3"
+
+# OSD specific options. Use %OSDNAME% which will be substituted.
+PID_OSD_GENERIC=/var/run/xtreemfs_%OSDNAME%.pid
+
+CONFIG_OSD_GENERIC=/etc/xos/xtreemfs/%OSDNAME%.config.properties
+
+LOG_OSD_GENERIC=/var/log/xtreemfs/%OSDNAME%.log
+
+if [ -z $JAVA_HOME ]; then
+  export JAVA_HOME=/usr
+fi
+JAVA_CALL="$JAVA_HOME/bin/java -ea -cp /usr/share/java/XtreemFS.jar:/usr/share/java/BabuDB.jar:/usr/share/java/Flease.jar:/usr/share/java/protobuf-java-2.5.0.jar:/usr/share/java/Foundation.jar:/usr/share/java/jdmkrt.jar:/usr/share/java/jdmktk.jar:/usr/share/java/commons-codec-1.3.jar"
+
+# For SELinux we need to use 'runuser' not 'su'
+if [ -x "/sbin/runuser" ]; then
+  SU="/sbin/runuser"
+else
+  SU="/bin/su"
+fi
+
+pre_check() {
+  LOG_OSD="$1"
+  CONFIG_OSD="$2"
+  exists=`grep -c $XTREEMFS_USER /etc/passwd`
+  if [ $exists -eq 0 ]; then
+    echo "User $XTREEMFS_USER does not exist. Create it first."
+    exit 1
+  fi
+  log_directory=`dirname $LOG_OSD`
+  if [ ! -e $log_directory ]; then
+    echo "Directory for logfiles $log_directory does not exist. Create it first."
+    exit 1
+  fi
+
+  if [ ! -f "$CONFIG_OSD" ]; then
+    echo -e "Config file not found: $CONFIG_OSD"
+    echo
+    exit 1
+  fi
+}
+
+get_osd_list() {
+  OSD_LIST=""
+  if [ -n "$1" ]; then
+    # Check if given OSD name in list of allowed OSDs.
+    for osd in $OSD_INSTANCES; do
+      [ "$osd" = "$1" ] && OSD_LIST="$1" && return 0
+    done
+
+    echo "OSD \"$1\" is not part of the list OSD_INSTANCES."
+    exit 1
+  else
+    OSD_LIST=$OSD_INSTANCES
+    return 0
+  fi
+}
+
+substitute_osdname() {
+  echo "$1" | sed -e "s/%OSDNAME%/$2/g"
+}
+
+pre_check_vars() {
+  for var in $LOG_OSD_GENERIC $PID_OSD_GENERIC $CONFIG_OSD_GENERIC; do
+    echo "$var" | grep %OSDNAME% >/dev/null || {
+    echo "%OSDNAME% parameter not found in variable: $var"
+    exit 1
+    }
+  done
+}
+
+start() {
+  get_osd_list "$1"
+  pre_check_vars
+
+  for osdname in $OSD_LIST; do
+    LOG_OSD=$(substitute_osdname "$LOG_OSD_GENERIC" "$osdname")
+    PID_OSD=$(substitute_osdname "$PID_OSD_GENERIC" "$osdname")
+    CONFIG_OSD=$(substitute_osdname "$CONFIG_OSD_GENERIC" "$osdname")
+
+    pre_check "$LOG_OSD" "$CONFIG_OSD"
+
+    echo >> $LOG_OSD
+    date >> $LOG_OSD
+    echo -e "Starting XtreemFS Object Storage Device (OSD): $osdname ... \n\n" >> $LOG_OSD
+
+    echo -n "Starting XtreemFS Object Storage Device (OSD): $osdname ... "
+    $SU -s /bin/bash $XTREEMFS_USER -c "$JAVA_CALL org.xtreemfs.osd.OSD $CONFIG_OSD" >> $LOG_OSD 2>&1 &
+    PROCPID=$!
+    echo $PROCPID > $PID_OSD
+    sleep 1s
+
+    if [ -e /proc/$PROCPID ]; then
+      echo "success"
+    else
+      echo "failed"
+      return 1
+    fi
+
+  done
+
+  return 0
+}
+
+stop() {
+  get_osd_list "$1"
+  pre_check_vars
+
+  for osdname in $OSD_LIST; do
+    LOG_OSD=$(substitute_osdname "$LOG_OSD_GENERIC" "$osdname")
+    PID_OSD=$(substitute_osdname "$PID_OSD_GENERIC" "$osdname")
+    CONFIG_OSD=$(substitute_osdname "$CONFIG_OSD_GENERIC" "$osdname")
+
+    result=0
+    if [ -f $PID_OSD ]; then
+      echo -n "Stopping XtreemFS Object Storage Device (OSD): $osdname ... "
+      killproc -p $PID_OSD $SU
+      result=$?
+      if [ $result -eq 0 ]; then
+        rm -f $PID_OSD
+        echo "success"
+      else
+        echo "failed"
+      fi
+    else
+      echo "XtreemFS Object Storage Device (OSD) is not running"
+    fi
+
+  done
+  return $result
+}
+
+status() {
+  get_osd_list "$1"
+  pre_check_vars
+
+  rc=0
+  for osdname in $OSD_LIST; do
+    LOG_OSD=$(substitute_osdname "$LOG_OSD_GENERIC" "$osdname")
+    PID_OSD=$(substitute_osdname "$PID_OSD_GENERIC" "$osdname")
+    CONFIG_OSD=$(substitute_osdname "$CONFIG_OSD_GENERIC" "$osdname")
+
+    if [ -f $PID_OSD ]; then
+      PROCPID=`cat $PID_OSD`
+      if [ ! -e /proc/$PROCPID ]; then
+        echo "XtreemFS Object Storage Device (OSD): $osdname has crashed"
+        rc=1
+      else
+        echo "XtreemFS Object Storage Device (OSD): $osdname is running"
+      fi
+    else
+      echo "XtreemFS Object Storage Device (OSD): $osdname is not running"
+      rc=3
+    fi
+  done
+
+  return $rc
+}
+
+# See how we were called.
+case "$1" in
+  start)
+    start "$2"
+    result=$?
+    ;;
+  stop)
+    stop "$2"
+    result=$?
+    ;;
+  status)
+    status "$2"
+    result=$?
+    ;;
+  reload)
+    result=0
+    ;;
+  restart)
+    stop "$2" && sleep 1 && start "$2"
+    result=$?
+    ;;
+  try-restart)
+    ## Stop the service and if this succeeds (i.e. the
+    ## service was running before), start it again.
+    $0 status "$2" >/dev/null
+    if [ $? -eq 0 ]; then
+      $0 restart "$2"
+      result=$?
+    else
+      result=0
+    fi
+    ;;
+  *)
+    echo -e "Usage: $0 {start|stop|restart|reload|status|try-restart}\n"
+    result=1
+    ;;
+esac
+
+exit $result