Automatically clean old temporary files

Temporary files can pile up, especially on servers that are infrequently rebooted. We all know how much a server hates to run out of hard disk space, so in a busy machine that gets rebooted once every year or two temporary files can be a headache that nobody wants to sit down and deal with proactively.

The technique I found that works for me is to set a policy on each temporary directory about how long files will remain there since they were last accessed, so people know what to expect. For example, in /tmp I might keep files for 7 days while in /var/tmp I might want to keep them for 30 days.

For fairly new files I use the last access time, rather than last modification time, so I know for sure that a file has not been needed during the decided period. If atimes are disabled in fstab then you will always be using mtimes. When checking for files more than 60 days old I use mtimes because some search tools will update atimes and give a false representation of a file's usage, but you may wish to remove this exception and always use atimes for safety.

An idea that occurs to me as I write this is that a person could put a policy file within each directory that had the number of days that the files were to be kept, if you wanted to give control of that to your users. In the script you will see that I check for a “tmp” directory in each /home/ directory; that might be one place you'd want to let people specify their own retention policy.

I have Cron run the script every night to achieve automated clean-up. You will find comments within the file and variables at the top to set the number of days etc.

Double-click to highlight the entire script.

#!/bin/bash
# Angelo Babudro  www.ispltd.org
# Clean-up or archive temporary files on a regular basis
# Intended to be run from CRON around midnight

# Define directories to be cleaned according to how old we allow files to be.

declare -a remove archive
#|The number in the square brackets is the number of days old the file
#|must be to be deleted.  Any integer can be used.
remove[7]="/tmp/"
remove[30]="/var/tmp/"
remove[365]="/usr/portage/distfiles/"
#|Files to compress and keep in an "archive" directory
# archive[10]="/var/tmp/";
ArchGroup="admins";                                #|Group to give ownership of archives
#|Directories to always keep, even if empty.
preserve="/var/tmp /var/tmp/backup /tmp"           #|Do not delete even if empty.  Space-delimited.
ignore="/var/tmp/.oracle|/tmp/tmux*";              #|Directories to ignore completely (regex)

if [[ -t 0 || -p /dev/stdin ]]
then
	Yellow=$'\e[1;33m'
	Cyan=$'\e[1;36m'
        Dim=$'\e[0;36m'
	Reset=$'\e[0m'
else
	Yellow=''
	Cyan=''
        Dim=''
	Reset=''
fi

[ "$1" = "-n" ] && dryrun=1 || dryrun=0
unknowns=0
[ "$1" != "" ] && unknowns=$(( 1 - dryrun ))
if [ $unknowns -gt 0 ];                            #|Anything unknown is treated as 'help'
then
   echo
   echo -e "\e[0;37mUnknown paramter \e[4m$1\e[0m"
   echo
   echo -e "Utility to periodically clean (erase) files from temporary directories."
   echo
   echo -e "Usage:\t\e[1;33m$0 [-n]\e[0m"
   echo
   echo -e "\t\e[1;33m-n\e[0m is to flag a dry-run, no files are erased."
   exit
fi

function tryToDelete {
    local afile="$1"
    local alsodirs="$2"
    if [[ -f ${afile} ]]; then
        if [ $dryrun -eq 1 ]; then
            echo "  would remove $afile"
        else
            if [[ $afile =~ $ignore ]]
            then
                printf "  ${Dim}Ignoring %s${Reset}\n" "$afile"
            else
                rm -v "${afile}" || echo "Error removing ${afile}"
            fi
        fi
    elif [[ -d ${afile} ]]; then
        [[ $alsodirs -eq 0 ]] && return
        afile=${afile%/}
        oldIFS="$IFS"
        IFS=" "
        for preserveDir in $preserve
        do
            if [[ $afile == $preserveDir ]]
            then
                printf "${Dim}Preserving %s${Reset}\n" "$afile"
                IFS="$oldIFS"
                return
            fi
        done
        IFS="$oldIFS"
        if [ $dryrun -eq 1 ]; then
            echo "  would try to remove dir $afile"
        else
            rmdir "${afile}" 2>/dev/null && echo "Removed dir $afile" || echo "Directory $afile is not empty or permission denied";
        fi
    else
      printf "${Dim}Ignored special file %s${Reset}\n" "$afile"
    fi
}
 
function cleanup {
    [[ -z $1 || -z $2 ]] && return 1
    local daysold="$1"
    local toclean="$2"
    [[ -d $toclean ]] || return 1
    [[ $daysold < 1 ]] && daysold=1;                    #|Minimum 1 day old
    [ $daysold -ne 1 ] && SayDays="days" || SayDays="day"
    let no_files=0
    IFS=$'\t';                                          #|Set field separator to TAB character
    TimeType="atime";                                   #|Default to using atime
    [ $days -gt 60 ] && TimeType="mtime";               #|When looking for old files use mtime
    printf "\n%sCheck %s to remove items with %s over %d %s old.%s\n" "$Yellow" "$toclean" "$TimeType" $days $SayDays "$Reset"

# Pass through the list twice:  First to delete files under directories,
# then a second pass to attempt to remove empty directories.
    for afile in $(find ${toclean} -writable -${TimeType} +${days} -printf '%p\t' 2>/dev/null)
    do
        tryToDelete "$afile" 0
        ((no_files++))
    done
    for afile in $(find ${toclean} -writable -${TimeType} +${days} -printf '%p\t' 2>/dev/null)
    do
        tryToDelete "$afile" 1
        ((no_files++))
    done
    echo -e "${Cyan}-> ${no_files} files found.${Reset}\n"
    IFS=$'\t \n';                                       #|Reset field separator
}

function packup {
	[[ -z $1 || -z $2 ]] && return 1
	local daysold="$1"
	local toclean="$2"
	[[ -d $toclean ]] || return 1
	fdate=`date +"%Y-%m-%d_%H:%M"`
	fsuffix="_${fdate}.gz"
	[[ $days < 1 ]] && days=1
	[[ $daysold < 1 ]] && daysold=1;					#|Minimum 1 day old
	[ $daysold -ne 1 ] && SayDays="days" || SayDays="day"
	ArchDir="${toclean}archive"
	if [ ! -d $ArchDir ];                           #|Create "archive" if it does not exist
	then
		if [ $dryrun -eq 1 ]
		then
			echo "I would create $ArchDir"
		else
			mkdir -m 775 $ArchDir
			chgrp $ArchGroup $ArchDir
		fi
	fi
	chgrp -R wheel $ArchDir
	echo -e "\nArchive directory is $ArchDir\n"
	let no_files=0
	IFS=$'\t';                                      #|Set field separator to TAB character
	TimeType="atime";                               #|Default to using atime
	[ $days -gt 60 ] && TimeType="mtime";           #|When looking for old files use mtime
	printf "\n%sCheck %s to pack items with %s over %d %s old.%s\n" "$Yellow" "$toclean" "$TimeType" $days $SayDays "$Reset"
	for afile in $(find ${toclean} -maxdepth 1 -type f -${TimeType} +${days} -printf '%p\t' 2>/dev/null)
	do
		echo -n " "
		if [ -f ${afile} ]; then                     #|Does the file exist?
			if [ "${afile:(-2)}" != "gz" ]; then      #|Is file NOT a gzip already?
				if [ $dryrun -eq 1 ]; then
					echo "  would gzip -9 --suffix $fsuffix $afile"
				else
					mv "{$afile}" ${ArchDir}/
					gzip -9v --suffix $fsuffix ${ArchDir}/"${afile}"
					mv "${afile}${fsuffix}" ${ArchDir}/
				fi
			else
				if [ $dryrun -eq 1 ]
				then
					echo "I would move $afile to $ArchDir"
				else
					mv "${afile}" ${ArchDir}/;          #|Move any GZip files we come across
				fi
			fi
		else
			test -f ${afile} || echo "Err: ${afile}"
		fi
		let no_files=no_files+1
	done
	echo -e "${Cyan}-> ${no_files} files found.${Reset}\n"
	IFS=$'\t \n';										#|Reset field separator
}

echo "--------------------"
echo "Running  $0"
echo "Server   `hostname -f`"
echo "Start    `date`"
echo "--------------------"
[ $dryrun -eq 1 ] && echo -e "${bg_Red}${Yellow}Dry run${Reset}"

for days in ${!remove[@]}
do
    for dirs in ${remove[$days]}
    do
        cleanup $days $dirs
    done
done

for days in ${!archive[@]}
do
    for dirs in ${archive[$days]}
    do
        packup $days $dirs
    done
done

echo
echo -n "Finish "
date
 
# _______________
# vim: ts=4:sw=4: