#!/bin/sh
#########################################################################
#									#
#  chkrflg 								#
#									#
#  This script is used to decide whether to process a job or the next   #
#  segment of a job.  The script reads and modifies files having the	#
#  name job@hostname_hostid, where job is a unique name for this	#
#  function, hostname is the short name of the host on which it is	#
#  running, and hostid is the hexadecimal host identification or a fake #
#  hostid from a file, which is updated at reboot.		 	#
#  These files are located in $HOME/runflags.				#
#									#
#  This script has four required command line inputs and a fifth	#
#  optional input.							#
#									#
#  Log:									#
#  K. Brill/HPC      20070316						#
#  K. Brill/HPC      20070919	Evaluate xchksum to check for valid	#
#                               runflag file entries			#
#  K. Brill/HPC      20071011   Modify documentation			#
#  C. Bailey/HPC     20090630   Added Hold option                       #
#  K. Brill/HPC      20091204   Add hostid to file name			#
#  K. Brill/HPC      20100106   Documentation				#
#  K. Brill/HPC      20100225   Look for hostname_fakehostid file	#
#  K. Brill/HPC      20100302   Add debug echo output into a log file	#
#########################################################################

if [ $# -lt 4 ]; then
    cat << "EOF" | more

set nextpoint = `chkrflg jobname YYYYMMDDHH nextpoint maxchecks [R, D, or H]`

    The following arguments are required on the command line:

    1 - A character string expressing a job name
    2 - The YYYYMMDDHH cycle date-time integer for this job
        Note: This may be any reference integer that is increased each
              time the job is run.
    3 - The current integer index of the processing point in this job
        Note:  This number is incremented by chkrflg, but must be 0
               on the first call in the driver script.
    4 - The maximum number of checks before a file with a particular
        cycle date-time (reference integer) is updated or removed to
        allow resumption of processing or initiation of new processing
        in the event of a suspended or defunct process
    5 - (OPTIONAL) The letter "d" or "D" indicating that this job is
        finished, the letter "r" or "R" indicating that a finished
        job is to be restarted, OR, the letter "h" or "H" indicating that
        this job is to be placed on hold.

    Note that the last call to this script made in the driver script
    should have "D" as the fifth entry so as to avoid delays that may
    be caused by the maximum number check.

    The chkrflg script is designed to handle an event when the job
    switches to another host.

    This script acts as a function, returning the incremented value
    of the processing point integer corresponding to the block of code
    about to be processed.  If the return status of chkrflg is 0, then
    then the block should be processed.  If the job is done or is
    already running on this or another host, the return status is 10.
    If the job has already been completed, the return status is 11.
    If the job is being resumed from processing on another host, the
    return status is 1 or 0.  Regardless of the return status, the value
    returned is always one plus the current integer processing point
    index entered on the command line.

    If chkrflg exits with a status of 2, then the script has detected a
    fatal error and prints an error message to standard output, which
    will be assigned to the variable receiving the returned value in
    the calling script.  This is the only case for which the returned
    function value is not an integer.

    Return status action summary:

         Return Status               Decision
               0         Execute the next block of code
               1         Skip the next block, but continue checking
               2         echo error string in return value, then exit
              10         Exit immediately, job is already running
              11         Job is already done

    The chkrflg script should never be called within a conditional block
    of scripting.  The only exception to this is a conditional check
    that decides whether or not sufficient data is available to initiate
    a job, in which case all calls to chkrflg are within that conditional
    block.  The chkrflg script can be called by sub-scripts as long as
    the jobname, job reference integer, and the processing point integer
    value are passed to the sub-script and, in the case of the processing
    point integer, back to the calling job.

    Note that jobs only use the status returned from chkrflg.  They do
    not use the processing point index value returned by the function
    call, but this value must be protected and passed intact to each
    succeeding call to chkrflg.

    The value for the maximum number of checks entered as the fourth
    command line parameter is determined by the interval between
    successive initiations of the job in CRON and the longest execution
    time for any single segment of the job.  It is the latter divided
    by the former.  For example if the job is triggered every 5 minutes
    and the longest execution time for any single block of code between
    calls to chkrflg is 20 minutes, then set the maximum number of checks
    to 4.  When chkrflg detects no change in the process point index, it
    first sets the current and previous process point indexes to the same
    value.  In subsequent calls, chkrflg increments the check counter
    from zero.  When the incremented value of the number of checks
    exceeds the maximum, the associated job is assumed to be defunct.

    A job can be placed on hold by using "h" or "H" as the fifth command line
    argument. When a job is on hold, the check counter will not increment
    when chkrflg is called. This is used when a job needs to wait for a 
    long period of time for an outside action to occur. To remove the hold,
    call chkrflg without a fifth command line argument at the same code 
    block as where the hold was initiated.

    Use examples:

    Example 1.  The first call from my_big_job, a /bin/sh script:

        nextppt=`chkrflg my_big_job 2007031512 0 2`
	cstat=$?
        if [ $cstat -eq 0 ]; then
	    process this block
	elif [ $cstat -eq 10 ]; then
	    exit
	elif [ $cstat -eq 11 -a $my_rerun_flag = "yes" ]; then
            nextppt=`chkrflg my_big_job 2007031512 0 2 R`
	    if [ $? -eq 0 ]; then
		process this block
	    else
		exit
	    fi
        fi

    If a previous "my_big_job" file from any host has been checked twice
    before, it will now be deleted and new one created.

    The value of nextppt will be 1 after chkrflg returns.  Example 1
    also shows how to force a rerun of a job that has already finished.

    Example 2.  A call from somewhere within my_big_job, a /bin/csh script:

        set nextppt = `chkrflg my_big_job 2007031512 $nextppt 2`
        set cstat = $status
        if ( $cstat == 0 ) then
            execute this block
        else if ( $cstat == 10 ) then
            exit
        endif

    Example 3.  A call from the end of my_big_job, a bin/sh script:

        nextppt=`chkrflg my_big_job 2007031512 $nextppt 2 D`

    This indicates the script is done.  The next time chkrflg is called
    for my_big_job for 2007031512, chkrflg will return with status = 11.

    Example 4. Call a hold in my_big_job, a bin/csh script:

        if (! -e $file ) then
            set nextppt = `chkrflg my_big_job 2007031512 $nextppt 2 h`
        else
            set nextppt = `chkrflg my_big_job 2007031512 $nextppt 2`
        endif
        set cstat = $status
        if ( $cstat == 0 ) then
            execute this block
        else if ( $cstat == 10 ) then
            exit
        endif

    This indicates that if $file does not exist, the job will be placed on
    hold. If $file does exist, the hold will be removed and the block can be
    executed.

    Some technical details:

    This script manages the job@hostname_hostid files in a directory called
    runflags under $HOME, deleting them when appropriate and creating them as
    needed.  The $HOME/runflags directory will be created if it does not
    exist.  Files following an old naming convention, job@hostname, are
    automatically converted to the current naming convention.

    The job@hostname_hostid file contains a single line of text:

    YYYYMMDDHH cpp check#_cpplast status

    where YYYYMMDDHH is the cycle date-time for the job, cpp is the
    current processing point integer index, check# is the integer
    number of times chkrflg has checked this file and detected no
    change in the process point index, cpplast is the value of cpp
    completed as of the last check, and status is either "done",
    "resumed", "running", or "hold".  Whenever chkrflg returns with a
    status of 10, ccp==cpplast in the file except when the job is
    running and updating the runflags file.

    This script, chkrflg, executes a script named chk_host_up.  The
    chk_host_up script assumes the host name (hostname -s) is entered
    on the command line, followed by "_" and the host identifier (hostid).
    If the host is up and the hostid matches, chk_host_up returns with
    $status == 0 ($? -eq 0); otherwise, $status == 1 ($? -eq 1) if the host
    is down, or $status == 2 ($? -eq 2) if the host id has changed.  The
    chkrflg user may provide a customized chk_host_up script in $PATH or use
    the one in ~hpcops/bin.  The ~hpcops/bin/chk_host_up uses ssh to determine
    if a host name is up.  The action of chkrflg is the same for either
    non-zero return status value (1 or 2).

    If the directory $HOME/chkrflg_debug exists, a detailed log file is
    created for each call to chkrflg.  The log files for a particular job
    are written in a separate sub-directory having the same name as the
    name of the job.

Use:

set nextpoint = `chkrflg jobname YYYYMMDDHH nextpoint maxchecks [R or D or H]`

EOF
    exit 2
fi

#  Store the command line inputs.

jobname=$1
Ymdh=$2
curpt=$3
maxchk=$4
done=no
redo=no
hold=no

if [ -d $HOME/chkrflg_debug ]; then
    mkdir -p $HOME/chkrflg_debug/$jobname
    logfile=$HOME/chkrflg_debug/$jobname/${jobname}_${Ymdh}_xrlfg_${$}.log
    echo=echo
else
    logfile=/dev/null
    echo=":"
fi
$echo CHECK RUN FLAGS $* >> $logfile

if [ $# -eq 5 ]; then
    input5=$5
    if [ $input5 = "d" -o $input5 = "D" ]; then
        done=yes
    else
        done=no
    fi
    if [ $input5 = "r" -o $input5 = "R" ]; then
	if [ $curpt -ne 0 ]; then
	    echo The current processing index must be 0 to start a rerun.
	    exit 2
	fi
        redo=yes
    else
        redo=no
    fi
    if [ $input5 = "h" -o $input5 = "H" ]; then
        hold=yes
    else
        hold=no
    fi
fi
ucYmdh=`echo $Ymdh | tr '[a-z]' '[A-Z]'`
lcYmdh=`echo $Ymdh | tr '[A-Z]' '[a-z]'`
if [ $ucYmdh != $lcYmdh ]; then
    echo Invalid 2nd argument:  YYYYMMDDHH must be numeric.
    exit 2
fi

uccurpt=`echo $curpt | tr '[a-z]' '[A-Z]'`
lccurpt=`echo $curpt | tr '[A-Z]' '[a-z]'`
if [ $uccurpt != $lccurpt ]; then
    echo Invalid 3rd argument:  Processing index must be numeric.
    exit 2
fi

hostname=`hostname -s`
if [ -s $HOME/.${hostname}_fakehostid ]; then
    hostid=`cat $HOME/.${hostname}_fakehostid`
else
    hostid=`hostid`
fi
mkdir -p $HOME/runflags
rflgfile_old=$HOME/runflags/${jobname}@${hostname}
rflgfile=$HOME/runflags/${jobname}@${hostname}_${hostid}
nxtpt=`expr $curpt + 1`
if [ $? -ne 0 ]; then
    echo Invalid 3rd argument:  Processing index must be numeric.
    exit 2
fi
if [ -s ${rflgfile_old} ]; then
    $echo EXECUTE mv ${rflgfile_old} $rflgfile >> $logfile
    mv ${rflgfile_old} $rflgfile
fi
otherhostfiles=`/bin/ls -1 $HOME/runflags | grep ${jobname}@ | grep -v ${hostname}_${hostid}`
numohf=`echo $otherhostfiles | wc -w`
hostfile=no
xstatus=null

if [ -s $rflgfile ]; then
    record=`cat $rflgfile`
    $echo EXECUTE record=cat $rflgfile >> $logfile
    $echo RECORD = $record >> $logfile
    xYmdh=`echo $record | cut -d" " -f1`
    xccp=`echo $record | cut -d" " -f2`
    xchk=`echo $record | cut -d" " -f3`
    xstatus=`echo $record | cut -d" " -f4`
    xc1=`echo $xchk | cut -d"_" -f1`
    xc2=`echo $xchk | cut -d"_" -f2`
    xchksum=`expr $xYmdh + $xccp + $xc1 + $xc2 2> /dev/null`
    if [ $? -gt 1 ]; then
        echo $rflgfile has invalid entries.
        $echo EXECUTE "/bin/rm -f $rflgfile" >> $logfile
        /bin/rm -f $rflgfile
        exit 2
    fi
    hostfile=yes

elif [ $numohf -eq 0 -a $done = "no" ]; then

#   No flagfiles exist.  Create one and initiate the job.

    $echo EXECUTE echo "$Ymdh $nxtpt 0_$curpt running > $rflgfile" >> $logfile
    echo $Ymdh $nxtpt 0_$curpt running > $rflgfile
    echo $nxtpt
    $echo EXECUTE exit 0 >> $logfile
    exit 0
else

#   Assign zero values in case job is flagged as done and
#   the flagfile does not exist.

    xYmdh=0
    xccp=0
    xchk=0_0
fi

# If job is done, write the "done" status flagfile.

if [ $done = "yes" -a $xYmdh -le $Ymdh ]; then
    $echo EXECUTE echo "$Ymdh $xccp $xchk done > $rflgfile" >> $logfile
    echo $Ymdh $xccp $xchk done > $rflgfile

#   Remove all flag files for other hosts having this runID.

    for ohf in $otherhostfiles; do
	ofile=$HOME/runflags/$ohf
	if [ -s $ofile ]; then
            record=`cat $ofile`
            $echo EXECUTE record=cat $ofile >> $logfile
	    $echo OLD FILE RECORD = $record >> $logfile
            qYmdh=`echo $record | cut -d" " -f1`
	    if [ $qYmdh -le $Ymdh ]; then
	        $echo EXECUTE "/bin/rm -f $ofile" >> $logfile
	        /bin/rm -f $ofile
	    fi
	fi
    done
    echo $nxtpt
    $echo EXECUTE exit 0 >> $logfile
    exit 0
fi

# Handle Hold processing flag

if [ $hold = "yes" -a $xYmdh -le $Ymdh ]; then
    $echo EXECUTE echo "$Ymdh $nxtpt 0_$xccp hold > $rflgfile" >> $logfile
    echo $Ymdh $nxtpt 0_$xccp hold > $rflgfile
    echo $nxtpt
    $echo EXECUTE exit 10 >> $logfile
    exit 10
fi

# Handle special case for resumed processing.

if [ $hostfile = "yes" -a $xstatus = "resumed" ]; then
    if [ $xYmdh -eq $Ymdh ]; then
	if [ $nxtpt -eq $xccp ]; then
	    $echo EXECUTE echo "$Ymdh $xccp 0_$xccp running > $rflgfile" >> $logfile
	    echo $Ymdh $xccp 0_$xccp running > $rflgfile
	    echo $nxtpt
            $echo EXECUTE exit 0 >> $logfile
            exit 0
	elif [ $curpt -eq 0 ]; then
	    ostat=10
	elif [ $nxtpt -gt $xccp ]; then
	    ostat=10
	else
	    $echo EXECUTE echo "$Ymdh $xccp 0_$xccp resumed > $rflgfile" >> $logfile
	    echo $Ymdh $xccp 0_$xccp resumed > $rflgfile
	    echo $nxtpt
            $echo EXECUTE exit 1 >> $logfile
	    exit 1
	fi
    fi

#   Refresh the data and check for stale file.

    record=`cat $rflgfile`
    $echo EXECUTE record = cat $rflgfile >> $logfile
    $echo REFRESH record = $record >> $logfile
    xYmdh=`echo $record | cut -d" " -f1`
    xccp=`echo $record | cut -d" " -f2`
    xchk=`echo $record | cut -d" " -f3`
    xstatus=`echo $record | cut -d" " -f4`
    xc1=`echo $xchk | cut -d"_" -f1`
    xc2=`echo $xchk | cut -d"_" -f2`
    xchksum=`expr $xYmdh + $xccp + $xc1 + $xc2 2> /dev/null`
    if [ $? -gt 1 ]; then
        echo $rflgfile has invalid entries.
        /bin/rm -f $rflgfile
        exit 2
    fi
    if [ $xc2 -eq $xccp ]; then
        xc1=`expr $xc1 + 1`
        if [ $xc1 -gt $maxchk ]; then
            $echo EXECUTE "/bin/rm -rf $rflgfile" >> $logfile
            /bin/rm -rf $rflgfile
        else
            $echo EXECUTE echo "$xYmdh $xccp ${xc1}_$xccp $xstatus > $rflgfile" >> $logfile
            echo $xYmdh $xccp ${xc1}_$xccp $xstatus > $rflgfile
        fi
        echo $nxtpt
        $echo EXECUTE exit 10 >> $logfile
        exit 10
    fi
    echo $nxtpt
    $echo EXECUTE exit $ostat >> $logfile
    exit $ostat
fi

#  Deal with scripts that are on hold

if [ $hostfile = "yes" -a $xstatus = "hold" ]; then
    if [ $xYmdh -eq $Ymdh ]; then
        if [ $nxtpt -eq $xccp  -a $hold = "yes" ]; then
	    echo $nxtpt
    	    $echo EXECUTE exit 10 >> $logfile
            exit 10
        elif [ $nxtpt -eq $xccp  -a $hold = "no" ]; then
#           resume running script
            $echo EXECUTE echo "$xYmdh $xccp ${xc1}_$xccp running > $rflgfile" >> $logfile
            echo $xYmdh $xccp ${xc1}_$xccp running > $rflgfile
	    echo $nxtpt
    	    $echo EXECUTE exit 0 >> $logfile
	    exit 0
        else
#           skip to next block
            echo $nxtpt
    	    $echo EXECUTE exit 1 >> $logfile
	    exit 1
        fi
    fi
fi
#  Deal with all the other host files next.

resumeproc=no

for ohf in $otherhostfiles; do
    ofile=$HOME/runflags/$ohf
    if [ -s $ofile ]; then
        record=`cat $ofile`
        $echo EXECUTE "record=cat $ofile" >> $logfile
        $echo EXAMINE record = $record >> $logfile
        qYmdh=`echo $record | cut -d" " -f1`
        qccp=`echo $record | cut -d" " -f2`
        qchk=`echo $record | cut -d" " -f3`
        qstatus=`echo $record | cut -d" " -f4`
	if [ $qstatus = "done" ]; then
	    if [ $qYmdh -eq $Ymdh ]; then
		$echo EXECUTE mv $ofile $rflgfile >> $logfile
		mv $ofile $rflgfile
		numohf=`expr $numohf - 1`
		xYmdh=$qYmdh
	        xstatus=$qstatus
		hostfile=yes
	    else
		$echo EXECUTE "/bin/rm -f $ofile" >> $logfile
		/bin/rm -f $ofile
		numohf=`expr $numohf - 1`
	    fi
	else
	    hstnam=`echo $ohf | cut -d"@" -f2`
#db 	    echo checking host = $hstnam
	    chk_host_up $hstnam > /dev/null 2<&1
	    chkstat=$?
	    $echo chk_host_up $hstnam RETURNS status = $chkstat >> $logfile
#db	    echo chk_host_up status = $chkstat
	    if [ $chkstat -ne 0 ]; then

#		This host is down.  Resume processing if
#               the runIDs match.

		if [ $qYmdh -eq $Ymdh ]; then
		    $echo EXECUTE echo "$Ymdh $qccp 0_$qccp running > $rflgfile" >> $logfile
		    echo $Ymdh $qccp 0_$qccp running > $rflgfile
		    $echo EXECUTE "/bin/rm -f $ofile" for resume >> $logfile
		    /bin/rm -f $ofile
		    resumeproc=yes
		    occp=$qccp
	        else
		    $echo EXECUTE "/bin/rm -f $ofile" NOT for resume >> $logfile
		    /bin/rm -f $ofile
		    numohf=`expr $numohf - 1`
		fi
	    else

#               This host is up and may be finishing this job.
# 		Refresh the information from the flag file.

        	record=`cat $ofile`
                $echo EXECUTE record = cat $ofile >> $logfile
                $echo REFRESHED old file record = $record >> $logfile
        	qYmdh=`echo $record | cut -d" " -f1`
        	qccp=`echo $record | cut -d" " -f2`
        	qchk=`echo $record | cut -d" " -f3`
        	qstatus=`echo $record | cut -d" " -f4`
		xc1=`echo $qchk | cut -d"_" -f1`
		xc2=`echo $qchk | cut -d"_" -f2`
		if [ $xc2 -eq $qccp ]; then
	    	    xc1=`expr $xc1 + 1`
	    	    if [ $xc1 -gt $maxchk ]; then
	        	$echo EXECUTE "/bin/rm -f $ofile" >> $logfile
	        	/bin/rm -f $ofile
		        numohf=`expr $numohf - 1`
			if [ $qYmdh -eq $Ymdh ]; then
		            resumeproc=yes
			    occp=$qccp
			fi
	    	    else
	                $echo EXECUTE echo "$qYmdh $qccp ${xc1}_$qccp $qstatus > $ofile" >> $logfile
	                echo $qYmdh $qccp ${xc1}_$qccp $qstatus > $ofile
			echo $nxtpt
    	    	 	$echo EXECUTE exit 10 >> $logfile
			exit 10
		    fi
		else
		    $echo EXECUTE echo "$qYmdh $qccp 0_$qccp $qstatus > $ofile" >> $logfile
		    echo $qYmdh $qccp 0_$qccp $qstatus > $ofile
		    echo $nxtpt
    	    	    $echo EXECUTE exit 10 >> $logfile
		    exit 10
		fi
	    fi
	fi
    fi
done

if [ $resumeproc = "yes" ]; then
    echo $nxtpt
    if [ $nxtpt -eq $occp ]; then
	$echo EXECUTE echo "$Ymdh $qccp 0_$qccp running > $rflgfile" >> $logfile
	echo $Ymdh $qccp 0_$qccp running > $rflgfile
    	$echo EXECUTE exit 0 >> $logfile
        exit 0
    else
	$echo EXECUTE echo "$Ymdh $qccp 0_$qccp resumed > $rflgfile" >> $logfile
	echo $Ymdh $qccp 0_$qccp resumed > $rflgfile
    	$echo EXECUTE exit 1 >> $logfile
	exit 1
    fi
fi

if [ $hostfile = "yes" ]; then
    if [ $Ymdh = $xYmdh ];  then

#	The runIDs match.
	if [ $xstatus = "done" ]; then
	    if [ $redo = "yes" ]; then
		$echo EXECUTE echo "$Ymdh $nxtpt 0_0 running > $rflgfile" >> $logfile
		echo $Ymdh $nxtpt 0_0 running > $rflgfile
		echo $nxtpt
    		$echo EXECUTE exit 0 >> $logfile
		exit 0
	    else
	        echo $nxtpt
    		$echo EXECUTE exit 11 >> $logfile
	        exit 11
	    fi
	else

#	    The job is running here.

	    xccpp1=`expr $xccp + 1`
	    if [ $xccpp1 -eq $nxtpt ]; then
	        $echo EXECUTE echo "$Ymdh $nxtpt 0_$curpt running > $rflgfile" >> $logfile
	        echo $Ymdh $nxtpt 0_$curpt running > $rflgfile
		ostat=0
	    elif [ $curpt -eq 0 ]; then
#               The host is up and may be finishing this job.
# 		Refresh the information from the flag file.

        	record=`cat $rflgfile`
    	        $echo EXECUTE record = cat $rflgfile >> $logfile
    	        $echo REFRESH record = $record >> $logfile
        	qYmdh=`echo $record | cut -d" " -f1`
        	qccp=`echo $record | cut -d" " -f2`
        	qchk=`echo $record | cut -d" " -f3`
        	qstatus=`echo $record | cut -d" " -f4`
		xc1=`echo $qchk | cut -d"_" -f1`
		xc2=`echo $qchk | cut -d"_" -f2`
                xchksum=`expr $qYmdh + $qccp + $xc1 + $xc2 2> /dev/null`
                if [ $? -gt 1 ]; then
                    echo $rflgfile has invalid entries.
                    $echo EXECUTE "/bin/rm -f $rflgfile" >> $logfile
                    /bin/rm -f $rflgfile
                    exit 2
                fi
		if [ $xc2 -eq $qccp ]; then
	    	    xc1=`expr $xc1 + 1`
	    	    if [ $xc1 -gt $maxchk ]; then

#			The job must be defunct, resume processing.

    			echo $nxtpt
    			if [ $nxtpt -eq $qccp ]; then
				$echo EXECUTE echo "$qYmdh $qccp 0_$qccp running > $rflgfile" >> $logfile
				echo $qYmdh $qccp 0_$qccp running > $rflgfile
    				$echo EXECUTE exit 0 >> $logfile
        			exit 0
    			else
				$echo EXECUTE echo "$qYmdh $qccp 0_$qccp resumed > $rflgfile" >> $logfile
				echo $qYmdh $qccp 0_$qccp resumed > $rflgfile
    				$echo EXECUTE exit 1 >> $logfile
				exit 1
    			fi
	    	    else
	                $echo EXECUTE echo "$qYmdh $qccp ${xc1}_$qccp $qstatus > $rflgfile" >> $logfile
	                echo $qYmdh $qccp ${xc1}_$qccp $qstatus > $rflgfile
		    fi
		    ostat=10
		else
		    $echo EXECUTE echo "$qYmdh $qccp 0_$qccp $qstatus > $rflgfile" >> $logfile
		    echo $qYmdh $qccp 0_$qccp $qstatus > $rflgfile
		    ostat=10
		fi
	    else
		ostat=10
	    fi
	    echo $nxtpt
    	    $echo EXECUTE exit $ostat >> $logfile
	    exit $ostat
	fi
    else

#	The runIDs do not match.  Increment the check count.
#       Refresh the information from the file first.

        record=`cat $rflgfile`
    	$echo EXECUTE record = cat $rflgfile >> $logfile
    	$echo REFRESH record = $record >> $logfile
        xYmdh=`echo $record | cut -d" " -f1`
        xccp=`echo $record | cut -d" " -f2`
        xchk=`echo $record | cut -d" " -f3`
        xstatus=`echo $record | cut -d" " -f4`
	xc1=`echo $xchk | cut -d"_" -f1`
	xc2=`echo $xchk | cut -d"_" -f2`
        xchksum=`expr $xYmdh + $xccp + $xc1 + $xc2 2> /dev/null`
        if [ $? -gt 1 ]; then
            echo $rflgfile has invalid entries.
            $echo EXECUTE "/bin/rm -f $rflgfile" >> $logfile
            /bin/rm -f $rflgfile
            exit 2
        fi
	if [ $xc2 -eq $xccp -a $xstatus != "done" ]; then
	    xc1=`expr $xc1 + 1`
	    if [ $xc1 -gt $maxchk ]; then
	        $echo EXECUTE "/bin/rm -rf $rflgfile" >> $logfile
	        /bin/rm -rf $rflgfile
	    else
	        $echo EXECUTE echo "$xYmdh $xccp ${xc1}_$xccp $xstatus > $rflgfile" >> $logfile
	        echo $xYmdh $xccp ${xc1}_$xccp $xstatus > $rflgfile
	    fi
	    echo $nxtpt
    	    $echo EXECUTE exit 10 >> $logfile
	    exit 10
	elif [ $xstatus != "done" ]; then
	    $echo EXECUTE echo "$xYmdh $xccp 0_$xccp $xstatus > $rflgfile" >> $logfile
	    echo $xYmdh $xccp 0_$xccp $xstatus > $rflgfile
	    echo $nxtpt
    	    $echo EXECUTE exit 10 >> $logfile
	    exit 10
	elif [ $curpt -eq 0 -a $xstatus = "done" ]; then

#	    Kick off a new job.

	    if [ $numohf -eq 0 ]; then
                $echo EXECUTE echo "$Ymdh $nxtpt 0_$curpt running > $rflgfile" >> $logfile
                echo $Ymdh $nxtpt 0_$curpt running > $rflgfile
		echo $nxtpt
    	        $echo EXECUTE exit 0 >> $logfile
	        exit 0
	    fi
	else

#	    The old process is still running.

	    echo $nxtpt
    	    $echo EXECUTE exit 10 >> $logfile
	    exit 10
	fi
    fi
else

#   Create a new file.

    $echo EXECUTE echo "$Ymdh $nxtpt 0_$curpt running > $rflgfile" >> $logfile
    echo $Ymdh $nxtpt 0_$curpt running > $rflgfile
    echo $nxtpt
    $echo EXECUTE exit 0 >> $logfile
    exit 0
fi

$echo "Contact developer:  rchkflg logic error." >> $logfile
echo "Contact developer:  rchkflg logic error."
exit 2
