#!/bin/sh
#########################################################################
#  This script is a generalized cleanup script.  The input is the	#
#  directory to clean, the file template, and the number of most	#
#  recent files to keep.  The fourth optional input is "r" or "l", to	#
#  remove or list, respectively, the names of files or directories	#
#  satisfying the template.  To remove files, the "r" option must be	#
#  given as the fourth input.						#
#									#
#  Log:									#
#  K. Brill/HPC      20070612						#
#  K. Brill/HPC      20090820	Use /bin/rm -rf to avoid prompts	#
#  K. Brill/HPC      20090917   Changes to allow 2-digit year (YY)	#
#  K. Brill/HPC      20091119   Put gc: in output text			#
#  K. Brill/HPC      20091120   Always search for the date-time (YMDH)  #
#				to avoid including other matching files # 
#  K. Brill/HPC      20091207   Documentation; search for date-time	#
#                               (YMDH) for case when one string remains #
#                               after removing template			#
#  K. Brill/HPC      20101028   Remove [ ] from tr string to translate  #
#  K. Brill/HPC      20110809   Allow "=" to stand for any character in #
#				the template.				#
#  K. Brill/HPC      20110810   Documentation update			#
#  K. Brill/HPC      20120306   Do a bulk remove for efficiency		#
#########################################################################
wid=`whoami`

if [ $# -lt 3 ]; then
    cat << EOF

    gc is a general cleanup script.

    Use> gc path template number [r/l]

	Enter the following on the command line:

	1.  The complete path to the directory to be cleaned.

	2.  The file or directory name template with cycle year represented
            by YYYY or YY, month by MM, day by DD, hour by HH.  All forecast
            hours for a given YYYYMMDDHH cycle date-time are kept.  In a
            template, replace the forecast hour digits with F.

            A series of "=" characters may be used anywhere in the template
            to replace a varying string of characters.  For example, the
            member name in an ensemble template is replace with "="
            characters.

        3.  The number of cycles to keep.

        4.  Optionally enter either "r" or "l":
              r = remove the files or directories matching the template.
              l = ONLY list the files or directories matching the
                  template that would otherwise be removed.  This is the
                  default if no 4th argument is given.

        Example:  gc \$MYMODEL/mdl my_mdl_YYYYMMDDHHfFFF.grd 5 r

        Assumptions:

        A.  Date-time groups are contiguous.  In other words, YYYY/YY, MM,
            DD, and HH cannot be separated by intevening characters.

        B.  Upper case "Y", "M", "D", "H", or "F" cannot appear anywhere in
            the actual file names.

        C.  The "=" character does not appear in the file name.

        D.  Any varying string replaced by "=" characters is the same length
            in each file name, and each character in the varying string is
            replaced by an "=" character in the template.  For example,
            here is a cleanup for the GEFS ensemble:

            gc $MODEL/gefs gep==_YYYYMMDDHHfFFF 3 r

            Note that "==" replaces the two-digit member number in the
            template in this case.  More than one string of "=" characters
	    is allowed.

    Use> gc path template number [r/l]

EOF
    exit
fi

qpath=$1
template=$2
nkeep=$3
action=$4
if [ .$action = "." ]; then
    action=l
fi

nctmp=`echo $template | wc -c`
ncm1=`expr $nctmp - 1`

#  Find the cut points for the templated elements.

sumnum=0

#  Variable parts of the template may be replaced by a series
#  of "=" characters.

modtpl=no
ept=`echo $template | cut -d"=" -f1 | wc -c`
if [ $ept -eq $nctmp ]; then
    ept=0
else
    modtpl=yes
fi

Ypt=`echo $template | cut -d"Y" -f1 | wc -c`
if [ $Ypt -eq $nctmp ]; then
    Ypt=0
else
    echo $template | grep YYYY > /dev/null 2>&1
    if [ $? -ne 0 ]; then
        echo $template | grep YY > /dev/null 2>&1
        if [ $? -ne 0 ]; then
	    echo gc:  Year template is not valid.
	    exit 1
	else
	    Yadd=1
	    sumnum=`expr $sumnum + 2`
	fi
        echo $template | grep YYY > /dev/null 2>&1
        if [ $? -eq 0 ]; then
	    echo gc:  Year template is not valid.
	    exit 1
        fi
    else
	Yadd=3
        sumnum=`expr $sumnum + 4`
    fi
    echo $template | grep YYYYY > /dev/null 2>&1
    if [ $? -eq 0 ]; then
	echo gc:  Year template is not valid.
	exit 1
    fi
fi
Mpt=`echo $template | cut -d"M" -f1 | wc -c`
if [ $Mpt -eq $nctmp ]; then
    Mpt=0
else
    sumnum=`expr $sumnum + 2`
    echo $template | grep MM > /dev/null 2>&1
    if [ $? -ne 0 ]; then
	echo gc:  Month template is not valid.
	exit 1
    fi
    echo $template | grep MMM > /dev/null 2>&1
    if [ $? -eq 0 ]; then
	echo gc:  Month template is not valid.
	exit 1
    fi
fi
Dpt=`echo $template | cut -d"D" -f1 | wc -c`
if [ $Dpt -eq $nctmp ]; then
    Dpt=0
else
    sumnum=`expr $sumnum + 2`
    echo $template | grep DD > /dev/null 2>&1
    if [ $? -ne 0 ]; then
	echo gc:  Day template is not valid.
	exit 1
    fi
    echo $template | grep DDD > /dev/null 2>&1
    if [ $? -eq 0 ]; then
	echo gc:  Day template is not valid.
	exit 1
    fi
fi
Hpt=`echo $template | cut -d"H" -f1 | wc -c`
if [ $Hpt -eq $nctmp ]; then
    Hpt=0
else
    sumnum=`expr $sumnum + 2`
    echo $template | grep HH > /dev/null 2>&1
    if [ $? -ne 0 ]; then
	echo gc:  Hour template is not valid.
	exit 1
    fi
    echo $template | grep HHH > /dev/null 2>&1
    if [ $? -eq 0 ]; then
	echo gc:  Hour template is not valid.
	exit 1
    fi
fi

if [ $sumnum -eq 0 ]; then
    echo " "
    echo gc:  No valid date-time template was given.
    exit 1
fi

Fpt=`echo $template | cut -d"F" -f1 | wc -c`
if [ $Fpt -eq $nctmp ]; then
    vgrep=' | grep -v "f[0-9]\{2,3\}"'
else
    vgrep=
    echo $template | grep 'F\{2,3\}' > /dev/null 2>&1
    if [ $? -ne 0 ]; then
	echo gc:  Forecast hour template is not valid.
	exit 1
    fi
fi

if [ $modtpl = "yes" ]; then
    template=`echo $template | tr '[=]' '[ ]'`
fi
#echo Modified template = $template

grepstrings="`echo $template | tr 'Y' ' ' | tr 'M' ' ' | tr 'D' ' ' | tr 'H' ' ' | tr 'F' ' '`"
nwrd=`echo $grepstrings | wc -w`

if [ $nwrd -eq 1 ]; then
    if [ $grepstrings = "f" ]; then
        grepit=' | grep f | grep "^[0-9]\{$sumnum\}"'
    else
	grepit=" | grep $grepstrings | grep '[0-9]\{$sumnum\}'"
    fi
elif [ $nwrd -gt 0 ]; then
    i=0
    for strng in $grepstrings; do
        i=`expr $i + 1`
        if [ $i -eq 1 ]; then
            grepit="grep $strng"
        else
	    grepit="$grepit | grep $strng"
        fi
    done
    grepit=" | $grepit | grep '[0-9]\{$sumnum\}'"
else
    grepit=' | grep "^[0-9]\{$sumnum\}\$"'
fi

if [ -d $qpath ]; then
    cd $qpath
else
    echo gc:  Directory $qpath does not exist.
    exit
fi

numchk=" | grep '^.\{$ncm1\}$'"

#echo GREP string:  $numchk $grepit $vgrep
#echo Ypt Mpt Dpt Hpt = $Ypt $Mpt $Dpt $Hpt

fdlist=`eval /bin/ls -1 $numchk $grepit $vgrep`

# Construct list of YYYYMMDDHH cycles.

cyclelist=
for fd in $fdlist; do
    cyc=
    if [ $Ypt -gt 0 ]; then
	iend=`expr $Ypt + $Yadd`
	yyyy=`echo $fd | cut -c${Ypt}-${iend}`
	cyc=$yyyy
    fi
    if [ $Mpt -gt 0 ]; then
	iend=`expr $Mpt + 1`
	mm=`echo $fd | cut -c${Mpt}-${iend}`
	cyc="$cyc${mm}"
    fi
    if [ $Dpt -gt 0 ]; then
	iend=`expr $Dpt + 1`
	dd=`echo $fd | cut -c${Dpt}-${iend}`
	cyc="$cyc${dd}"
    fi
    if [ $Hpt -gt 0 ]; then
	iend=`expr $Hpt + 1`
	hh=`echo $fd | cut -c${Hpt}-${iend}`
	cyc="$cyc${hh}"
    fi
    echo $cyclelist | grep $cyc > /dev/null 2>&1
    if [ $? -ne 0 ]; then
        echo ${cyc} >> /tmp/$$cycles.$wid
    fi
    cyclelist="$cyclelist ${cyc}"
done

nymdh=0
if [ -s /tmp/$$cycles.$wid ]; then
    nymdh=`cat /tmp/$$cycles.$wid | wc -l`
    if [ $? -ne 0 ]; then
	nymdh=0
    fi
else
    echo gc:  Template may not be valid or appropriate.
    exit 1
fi
if [ $nymdh -eq 0 ]; then
    echo gc:  No file or directories match the template.
    /bin/rm /tmp/$$cycles.$wid
    exit 1
fi

ndel=`expr $nymdh - $nkeep`
if [ $? -gt 1 ]; then
    echo gc:  The keep number is not valid.
    exit 1
fi

if [ $ndel -le 0 ]; then
    echo gc:  There are no cycles to remove.
    /bin/rm /tmp/$$cycles.$wid
    exit
fi

dcycles=`sort -ru /tmp/$$cycles.$wid | tail -$ndel`
/bin/rm /tmp/$$cycles.$wid

rmlist=
i=0
for fd in $fdlist; do
    i=`expr $i + 1`
    cyc=`echo $cyclelist | cut -d" " -f$i`
    echo $dcycles | grep $cyc > /dev/null 2>&1
    sttus=$?
    if [ $sttus -eq 0 ]; then
        if [ $action = r ]; then
	    echo gc:  Removing $fd
	    rmlist="$rmlist $fd"
        else
	    echo $fd would be removed.
        fi
    fi
done

if [ $action = r ]; then
#   Execute bulk remove:
    /bin/rm -rf $rmlist
fi
