#!/bin/sh

# Helper script to flag done LoadLeveler jobs.
# The script is called periodically by the grid-manager.
#
# Anders Waananen <waananen@nbi.dk>
# contrib:
# Christian Søttrup <soettrup@nbi.dk>

# Does the control directory exist?
control_dir="$1"
test -d "$control_dir" || exit 1

# Get ARC_LOCATION
if test "${ARC_LOCATION}" = ""
then
  ARC_LOCATION=/opt/nordugrid
#  echo "ARC_LOCATION is not set." 1>&2
#  exit 1
fi

# Get LoadLeveler environment
if test -f "${ARC_LOCATION}/libexec/configure-ll-env.sh"
then
  :
else
  echo "${ARC_LOCATION}/libexec/configure-ll-env.sh not found." 1>&2
  exit 1
fi
source ${ARC_LOCATION}/libexec/configure-ll-env.sh

#for i in "$control_dir"/job.*.local
find $control_dir -maxdepth 1 -type f -name job\*.local | while read i
do
  # Continue if no glob expansion or other problems
  test -f "$i" || continue

  jobid=`basename $i .local|sed 's/^job.//'`
  donefile="${control_dir}/job.${jobid}.lrms_done"    
  statusfile="${control_dir}/job.${jobid}.status"
  
  # Continue if the job is already flagged as done?
  test -f "$donefile" && continue
  test ! -f "${control_dir}/job.${jobid}.grami"  && continue
 
  # Get local LRMS id of job by evaluating the line with localid
  localid=`grep ^localid= $i|head -1`
  eval $localid

  # Did we get a local id?
  test "$localid" = "" && continue

  # Get job status 
  if status=`$LL_BIN_PATH/llq -l $localid|grep '^ *Status'`
  then
    status=`echo $status | sed 's/^ *Status: //'`
    case "$status" in
      		Completed) ;;
     		 Canceled) continue;;
                        *) continue;;
    esac
  fi

  exitcode=''

  # get session directory of this job
  session=`grep -h '^sessiondir=' "$control_dir/job.${jobid}.local" | sed 's/^sessiondir=\(.*\)/\1/'`
  diagfile="${session}.diag"
  commentfile="${session}.comment"

  if [ ! -z "$session" ] ; then
    # have chance to obtain exit code
    exitcode=`grep '^exitcode=' "$diagfile" | sed 's/^exitcode=//'`  
  else
    continue
  fi
  
  if [ ! -z "$exitcode" ] ; then
    if [ "$exitcode" = "152" -o $exitcode = "24" ] ; then
	exitcode="24"     
    	echo "$exitcode Job exceeded time limit." > "$donefile"
        ${NORDUGRID_LOCATION}/libexec/gm-kick "$statusfile"
        continue  
    fi
    # job finished and exit code is known
    echo "$exitcode Executable finished with exit code $exitcode" >> "$donefile"               
    ${NORDUGRID_LOCATION}/libexec/gm-kick "$statusfile"
    continue
  fi
  exitcode=-1
  echo "$exitcode Job finished with unknown exit code" >> "$donefile"
  ${NORDUGRID_LOCATION}/libexec/gm-kick "$statusfile"
done

sleep 60
exit 0
