#!/bin/bash
################################################################################
# ujssubmit 0.2
# Unified Job Scheduler
#-------------------------------------------------------------------
# created by Martin Rupp 05.03.2012
# mail: martin.rupp@gcsc.uni-frankfurt.de
# authors: Martin Rupp, Andreas Vogel, Arne Naegel.
#
# ugsubmit is a job submitting script to unify the job schedulers
# on different clusters. Supported Clusters are:
# - cekon , G-CSC Frankfurt (SLURM, salloc)
# - cesari , G-CSC Frankfurt (SLURM, salloc)
# - Jugene, Forschungszentrum Juelich (ll_submit)
# - Juqueen, Forschungszentrum Juelich (ll_submit)
# - Jureca, Forschungszentrum Juelich (SLURM)
# - Juwelsjureca, Forschungszentrum Juelich (SLURM)
# - SDCluster, Ruhr University Bochum (SLURM)
# - Hermit, HLRS Stuttgart (qsub, aprun)
# - NecNehalem, HLRS Stuttgart (qsub, mpirun)
# - MOAB Workload Manager (msub, srun)
# - Rosa, CSCS Lugano (SLURM, aprun)
#
# feel free to add more! see the files "schedulers/new_template" and "clusters"
#
# usage: see function usage(). quick manual:
# use environment variables e.g. 
# export UGSUBMIT_TYPE=cekon 
# export UGSUBMIT_EMAIL=martin.rupp@gcsc.uni-frankfurt.de
# to start a script running 64 processes:
# ugsubmit 64 --- ugshell -ex laplace.lua -numPreRefs 5 -numRefs 9
# output is then written to ./job.64.(current date)/job.out
# the current job dir is linked to by lastRun
################################################################################

# todo: error on file not found etc.


################################################################################
# set default values
test=false;
nppn="max";
walltime="unlimited";  # walltime
jobname="job";
scriptname=$0;		  # name of script
execpath=`pwd`;       # execution path of script
runDir=$execPath
execdate=`date`;      # date of execution
verbose=false;
exclusive=false;
nosuffix=false;
scriptpath="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

mailStart=false
mailEnd=false
mailError=false
mail=false
jobid="unknown"

profilePrefix=""

tail=false
interactive=false
workDir=`pwd`

# Jugene Options
JugeneMode="VN"
JugeneMapfile="TXYZ"
JugeneVerbose=2

# Hermit Options (max 31)
HermitWorkspace=0

nosubdir=false

# general PBS Options
pbsMailtype=""

completeCommandline=$*

################################################################################
# todo: option -nodir
# todo: interactive mode

echo "ugsubmit 0.2. (c) Goethe-Center for Scientific Computing 2012-2014"


# complete -W "-test -nppn -cluster -walltime -mail-start -mail-end -mail-error -mail-arr -verbose -tail -i -dir -scan -Jugene-mode -Jugene-mapfile -Jugene-verbose -Hermit-workspace" ugsubmit

################################################################################
function usage 
{
	echo ""
    echo " usage:  "$scriptname" <npe> [optional Parameters] --- <binary> <arg1> <arg2> ..."
	echo " "
	echo "        <npe>                  : number of processes"
	echo "        <binary>               : the binary"
	echo "        <args>                 : arguments for execution of binary"
	echo ""
	echo " optional Parameters are:"
	echo "        -test                  : do not actually execute, only write scripts (also sets -verbose)"
	echo "        -nppn <nppn>           : number of processes per node (optional, default "$nppn")"
	echo "        -cluster <type>        : cluster to use. see below (or use env. variable UGSUBMIT_TYPE)"
	echo "        -name <jobname>        : a jobname to be used (optional, default \""$jobname"\")"
	echo "        -nosuffix              : do not add .npe.date suffix to jobname"
	echo "        -walltime <hh:mm:ss>   : max walltime for job to run (optional; default is "$walltime")"
	echo "        -partition <partition> : the partition to be used"
	echo "        -mem <mem>             : max (overall) main memory for the job"
	echo "        -email <emailadress>   : emailadress. you can also use the environment variable UGSUBMIT_EMAIL."
	echo "        -mail-start            : mail when job starts"
	echo "        -mail-end              : mail when job ends"
	echo "        -mail-error            : mail on error"
	echo "        -mail-all              : "
	echo "        -exclusive             : exclusive allocation of nodes (no sharing)"
	echo "        -verbose, -v           : enable verbose mode"
	echo "        -tail                  : display the output (as tail -f. job runs even if you hit Ctrl-C)"
	echo "        -i                     : interactive (not fully supported)"
	echo "        -dir                   : directory to use"
	echo "        -mkdirs <dir1;dir2;..> : sub-directories to create within working directory"
	echo "        -copyScript            : copy executed lua script to job directory and execute it there"
	echo "        -queue                 : the queue to use"
	echo "        -nosubdir              : don't create job in a subdir, but in dir (default .) directly. (warning: might be dangerous)"
	echo " "
	echo " Supported Cluster Types: NecNehalem, Hermit, Jugene, Juqueen, Jureca, Juwels, SDCluster, mpi (mpi-foreground, mpi-background), cekon, cesari, moab."
	if [ -n "$UGSUBMIT_TYPE" ]; then 
		echo " Only UGSUBMIT_TYPE=$UGSUBMIT_TYPE specific parameters are shown ! Set UGSUBMIT_TYPE=\"\" to see all."
	fi
	echo " "
	if [ -z "$UGSUBMIT_TYPE" ] || [ $UGSUBMIT_TYPE == "Jugene" ]; then
		echo " Jugene Parameters"
		echo "        -Jugene-mode            : VN, DUAL or SMP. Default "$JugeneMode
		echo "        -Jugene-mapfile         : Mapfile. Default "$JugeneMapfile
		echo "        -Jugene-verbose         : Verbose Level. Default "$JugeneVerbose
	fi
	if [ -z "$UGSUBMIT_TYPE" ] || [ $UGSUBMIT_TYPE == "Hermit" ]; then
		echo ""
		echo " Hermit Parameters"
		echo "        -Hermit-workspace <days>: Use the Hermit Workspace mechanism for I/O. default 31"
		echo "                                  https://wickie.hlrs.de/platforms/index.php/Workspace_mechanism"
	fi
	if [ -z "$UGSUBMIT_TYPE" ] || [ $UGSUBMIT_TYPE == "Juqueen" ]; then
		echo ""
		echo " Juqueen Parameters"
		echo ""
	fi
	if [ -z "$UGSUBMIT_TYPE" ] || [ $UGSUBMIT_TYPE == "Jureca" ]; then
		echo ""
		echo " Jureca Parameters"
		echo "        -Jureca-partition <part>: Partition to use; default is batch:"
		echo "                                  devel (max. 2h walltime, 8 nodes, 128G memory/node)"
		echo "                                  batch (max. 24h walltime, 128 nodes, 128/256 memory/node)"
		echo "                                  mem512 (512G memory/node, needs to be explicitly requested)"
		echo "        -Jureca-memory <mem>:     Amount of memory to use (default is 128): 128, 256, 512."                   
		echo ""
	fi
	if [ -z "$UGSUBMIT_TYPE" ] || [ $UGSUBMIT_TYPE == "Juwels" ]; then
		echo ""
		echo " Juwels Parameters"
		echo "        -Juwels-partition <part>: Partition to use; default is batch:"
		echo "                                  batch (max. 24h walltime, 256 nodes, 96G memory/node)"
		echo "                                  mem192 (192G memory/node)"
		echo "        -Juwels-project <name>:   Project name under which the job is to be carried out,"
		echo "                                  e.g., HFUxxx; needed for accounting."
		echo ""
	fi
	if [ -z "$UGSUBMIT_TYPE" ] || [ $UGSUBMIT_TYPE == "DoraCSCS" ]; then
	    echo ""
	    echo " Dora Parameters"
	    echo ""
	fi
	echo ""
	echo " descr: The script creates a new folder relative to the current path with the name"
	echo "        '<jobname>.<npe>' (where <npe> is the number of processes. In this new directory"
	echo "        a file named 'info.txt' is created, to identify the job. Then, depending"
	echo "        on the selected cluster type, a shell script is created and submitted to the"
	echo "        job scheduling system. ugsubmit also sets a symbolic link lastRun to the newly"
    echo "        created directory. the file 'jobid' contains the job id."
	echo "        A list of submitted jobs is created in the file 'ugjobs'."
	echo " "
	echo " exported environment variables to the script/program:"
	echo "        UGS_JOBNAME            : job name"
    echo "        UGS_OUTDIR             : output directory"
    echo "        UGS_NP                 : see <npe>"
    echo "        UGS_NPPN               : see -nppn"
	echo "        UGS_MYDIR              : the directory ugsubmit is started from"
	echo ""
	echo " Environment Variables to control ugsubmit:"
if [ -z "$UGSUBMIT_TYPE" ]; then
	source $scriptpath/clusterdetect
	echo "        UGSUBMIT_TYPE=\"$UGSUBMIT_TYPE\" (auto-detected)"
else
	echo "        UGSUBMIT_TYPE=\"$UGSUBMIT_TYPE\""
fi
if [ ! -z "$UGSUBMIT_MORE" ]; then
	echo "        UGSUBMIT_MORE=\"$UGSUBMIT_MORE\""
fi
if [ -z "$UGSUBMIT_EMAIL" ]; then
	echo "        UGSUBMIT_EMAIL is not set!"
else
   	echo "        UGSUBMIT_EMAIL=\""$UGSUBMIT_EMAIL\"
fi

    exit 0;
}

################################################################################
# read args

# check that at least 3 args given, else print usage
if [ $# -lt 3 ]; then
   usage
fi

npe=$1;            # number of nodes
shift

while [ true ]
do
	#echo $1
	#echo $2
	if [ $# -eq 0 ]; then
		echo "missing -ex <binary>!"
        exit
	fi
	if [ $1 == "-nppn" ]; then
		nppn=$2		   
		shift 2
	elif [ $1 == "-walltime" ]; then
		walltime=$2
		shift 2
	elif [ $1 == "-name" ]; then
		jobname=$2
		shift 2
	elif [ $1 == "-nosuffix" ]; then
		nosuffix=true
		shift 1
	elif [ $1 == "-so" ]; then
		scheduleroptions=$2
		shift 2
	elif [ $1 == "-test" ]; then
		test=true
		shift 1
	elif [ $1 == "-email" ]; then
		UGSUBMIT_EMAIL=$2
		shift 2
	elif [ $1 == "-cluster" ]; then
		UGSUBMIT_TYPE=$2
		shift 2
	elif [ $1 == "-Jugene-mode" ]; then
		JugeneMode=$2
		shift 2
	elif [ $1 == "-Jugene-mapfile" ]; then
		JugeneMapfile=$2
		shift 2
	elif [ $1 == "-Jugene-verbose" ]; then
		JugeneVerbose=$2
		shift 2
	elif [ $1 == "-mail-error" ]; then
		mailError=true
		mail=true
		pbsMailtype=${pbsMailtype}a
		shift 1
	elif [ $1 == "-mail-end" ]; then
		mailEnd=true
		mail=true
		pbsMailtype=${pbsMailtype}e
		shift 1
	elif [ $1 == "-mail-start" ]; then
		mailStart=true
		mail=true
		pbsMailtype=${pbsMailtype}b		
		shift 1
	elif [ $1 == "-mail-all" ]; then
		mailStart=true
		mailStop=true
		mailError=true
		mail=true
		pbsMailtype="abe"
		shift
	elif [ $1 == "-exclusive" ]; then
		exclusive=true
		shift
	elif [ $1 == "-i" ]; then
		interactive=true
		shift 1
	elif [ $1 == "-verbose" ]; then
		verbose=true
		shift 1
	elif [ $1 == "-v" ]; then
		verbose=true
		shift 1
	elif [ $1 == "-tail" ]; then
		tail=true
		shift 1
	elif [ $1 == "-Hermit-workspace" ]; then
		HermitWorkspace=$2
		shift 2
	elif [ $1 == "-dir" ]; then
		workDir=$2
		shift 2
	elif [ $1 == "-mkdirs" ]; then
		mkdirs=$2
		shift 2
	elif [ $1 == "-copyScript" ]; then
		copyScript=true
		shift 1
	elif [ $1 == "-queue" ]; then
		queue=$2
		shift 2	
	elif [ $1 == "-scan" ]; then
		profilePrefix="scalasca -analyze \\"
		shift 1
	elif [ $1 == "-scant" ]; then
		profilePrefix="scalasca -analyze -t \\"
		shift 1
	elif [ $1 == "-profilePrefix" ]; then
		profilePrefix=$2
		shift 2
	elif [ $1 == "-runjobarg" ]; then
		runjobarg=$2
		shift 2
	elif [ $1 == "-scorep" ]; then
		runjobarg="--envs SCOREP_EXPERIMENT_DIRECTORY=ScoreP SCOREP_METRIC_PAPI=PAPI_FP_OPS,PAPI_LD_INS,PAPI_SR_INS,PAPI_INT_INS SCOREP_MPI_MAX_GROUPS=100 SCOREP_MPI_MAX_COMMUNICATORS=100 SCOREP_PROFILING_MAX_CALLPATH_DEPTH=60 "
		shift 1
	elif [ $1 == "-nosubdir" ]; then
		nosubdir=true
		shift 1
	elif [ $1 == "-Jureca-partition" ]; then
		jureca_part=$2
		shift 2
	elif [ $1 == "-Jureca-memory" ]; then
		jureca_mem=$2
		shift 2
	elif [ $1 == "-Juwels-partition" ]; then
		juwels_part=$2
		shift 2
	elif [ $1 == "-Juwels-project" ]; then
		juwels_project=$2
		shift 2
	elif [ $1 == "-Shaheen-project" ]; then
		shaheen_project=$2
		shift 2
	elif [ $1 == "-partition" ]; then
		partition=$2
		shift 2
	elif [ $1 == "-mem" ]; then
		mem=$2
		shift 2
	elif [ $1 == "---" ]; then
		shift
		break
	else
		echo "undefined option $1"
		exit
	fi
done

if [ $interactive == false ]; then 
	if [ $# -eq 0 ]; then
		echo "missing binary!"
		exit
	fi
fi

binary=$1
shift
args=$*

#### Get Options from actual cluster implementation ####
#### (default clusters also set there)
source $scriptpath/clusters


UJS_GetOptions

if [ $nppn == "max" ]; then
	if [ $npe -gt $nppnmax  ]; then
		nppn=$nppnmax
	else
		nppn=$npe
	fi
fi

if [ $nppn -gt $nppnmax ]; then
	echo "$UGSUBMIT_TYPE does not support nppn=$nppn (max is $nppnmax)"
	exit
fi

if [ $npe -gt $pemax ]; then
	echo "ERROR: pemax for $UGSUBMIT_TYPE is $pemax, and this is greater than requested PEs ($npe)"
	exit
fi

#nnodes=$((npe/nppn))
# round up (note that bash can't do floating point math)
nnodes=$(((npe+(nppn-1))/nppn))

################################################################################

# get path to executable
executable=`which $binary`
if [ -z $executable ]; then
	echo "The binary \"$binary\" could not be found.".
	executable=`which ./$binary`
	exit
fi
if [ ${executable:0:1} = "." ]; then
	executable=$execpath/$binary
fi

dirdate=$(date "+%Yy%mm%dd-%Hh%Mm%Ss");

if [ $nosuffix == false ]; then
	jobname=$jobname.$npe.$dirdate;
fi

#create output directory
if [ $nosubdir == true ]; then
	outdir=$workDir
else
	if [ $UGSUBMIT_TYPE == "Hermit" ] && [ $HermitWorkspace -gt 0 ] ; then
		if [ $test == true ]; then
			HermitWorkspace=1
		fi
		outdir=`ws_allocate $jobname $HermitWorkspace`
		echo "Hermit Workspace dir for $jobname is $outdir, for $HermitWorkspace days"
		ln -s $outdir $jobname
	else
		HermitWorkspace=0
		outdir=$workDir/$jobname
		
		# check if directly exist
		if [ -d $outdir ]; then
			# add milliseconds to disambigue
			millisec=$(date "+%3Nms");
			if [ "$millisec" != "3Nms" ]; then # fix for maxosx: %N not supported 
				outdir=$outdir$millisec;
			fi
		fi
			
		# if still exist: add random string, else create directory
		if [ -d $outdir ]; then
			outdir=$(mktemp -d $outdir.XXXXXX);
		else
			mkdir $outdir	
		fi
	fi
fi

echo "Created directory: $outdir"
if [ -L lastRun ]; then
	rm lastRun
fi
if [ $nosubdir == false ]; then
	ln -s $outdir lastRun
fi

#create info file in output directory
if [ ! -e $outdir/info.txt ]; then
    touch $outdir/info.txt
fi

cd $outdir
echo " -------------------------------------- " >> info.txt
echo "      ugsubmit submitted Job" >> info.txt
echo " -------------------------------------- " >> info.txt
echo " complete ugsubmit command line:" >> info.txt
echo "               ugsubmit $completeCommandline" >> info.txt
echo "" >> info.txt
echo " executed command for cluster:" >> info.txt

export UGS_JOBNAME=$jobname
export UGS_OUTDIR=$outdir
export UGS_NP=$npe
export UGS_NPPN=$nppn
export UGS_MYDIR=$execpath

## make subdirs
subdirs=$(echo $mkdirs | tr ";" "\n")
for sdir in $subdirs; do
    mkdir $sdir
done

## copy executable script to output directory
scriptCopied=false
if [ "$copyScript" = true ]; then
	nextArgIsScript=false
	for curArg in $args; do
		if [ "$nextArgIsScript" = true ]; then
			scriptPath="$curArg"
			origScriptPath="$curArg"
			if [ ! -e $scriptPath ]; then
				# test with prefix $UG4_ROOT
				if [ -e ${UG4_ROOT}/apps/${scriptPath} ]; then
					scriptPath="${UG4_ROOT}/apps/${scriptPath}"
				else
					echo "Warning: Did not find execution script '$scriptPath' nor '${UG4_ROOT}/apps/${scriptPath}'."
					break
				fi
			fi
			filename="${scriptPath##*/}"
			cp $scriptPath $filename
			args=$(echo "$args" | sed "s|${origScriptPath}|./${filename}|g")
			scriptCopied=true
			break
		fi
		if [ "$curArg" = "-ex" ]; then
			nextArgIsScript=true
		fi
	done
fi
if [[ "$copyScript" = true && "$scriptCopied" = false ]]; then
	echo "Warning: Could not find execution script. Not copied."
fi


#### now call job schedulers ####
UJS_Submit

################################################################################
# writing info: info.txt
echo "" >> info.txt
echo " cluster:      "$UGSUBMIT_TYPE >> info.txt
echo " submit by:    "$scriptname >> info.txt
echo " exec path:    "$workDir >> info.txt
echo "" >> info.txt
echo " jobname:      "$jobname >> info.txt
echo " nnodes:       "$nnodes >> info.txt
echo " nppn:         "$nppn >> info.txt
echo " npe:          "$npe >> info.txt
echo " walltime:     "$walltime >> info.txt
echo " executable:   "$executable >> info.txt
echo " arguments:    "$args >> info.txt
echo "" >> info.txt
echo " date:         "$execdate >> info.txt
echo " email:        "$UGSUBMIT_EMAIL >> info.txt
echo " output:       "$outdir >> info.txt
echo "" >> info.txt
################################################################################


if [ $interactive == false ]; then
	
	if [ $nosubdir == false ]; then
		if [ ! -e $execpath/ugjobs ]; then
			echo "JOBID     EXECDATE      DIRECTORY" > $execpath/ugjobs
		fi
		echo "$jobid    $execdate    $outdir" >> $execpath/ugjobs
	fi	
	echo "Received job id: $jobid"	
	echo " job id:       "$jobid >> info.txt

	if [ $tail == true ]; then
		touch job.output
		sleep 1; tail -f job.output -c +0
	fi
	
	if [ ! "$jobid" = "?" ] && [ $nosubdir = false ]; then
		cd $execpath
		ln -s $outdir jobid.$jobid
	fi	
fi

cd $execpath

if [ $verbose == true ]; then
	cat ${outdir}/info.txt
fi
