#!/bin/bash #Copyright 2001, William Stearns #Released under the GPL. #This script will free up space by hardlinking identical files together. #From the yet-another-program-that-should-have-been-written-in-a-real-language series. #Presettable parameters (place on command line before freedups): #DEBUG=YES #Debugging output if YES. #ACTUALLYLINK=YES #Just reports on potential savings if anything but YES. #VERBOSE=YES #Show directory listing and wait before linking if YES. #CHECKDATE=YES #Modified date and time must be equal to be considered for linking if YES. #FILENAMESEQUAL=YES #Files must have the same name (in different directories to be considered for linking if YES. #MINSIZE=size #Files must be larger than this size (in bytes) to be considered for linking. #EXCLUDE=regex #Egrep regex of files to ignore - not implemented yet. #Command line holds just the dirs to look under. #FIXME - remove the following before shipping DEBUG=YES SPACESAVED=0 SPACEWOULDHAVESAVED=0 if [ "$DEBUG" = "YES" ]; then debug () { echo $* >/dev/stderr } else debug () { : } fi nodebug () { : } linkfiles () { #Parameters: the 2 files that need to be hardlinked together. nodebug '++++' lf "$@" if [ $# -lt 2 ]; then return 1 fi for AFILE in "$@" ; do if [ ! -f "$AFILE" ]; then debug $AFILE is not a file, aborting link. return 1 fi done FILESIZE=`ls -al "$1" | awk '{print $5}'` if [ "$ACTUALLYLINK" = "YES" ]; then # FIRSTFILE="$1" # shift # for OTHERFILE in "$@" ; do #To support more than 2 file parameters, revert the following "link to older file" logic to the preceding loop. if [ "$1" -nt "$2" ]; then FIRSTFILE="$2" OTHERFILE="$1" else FIRSTFILE="$1" OTHERFILE="$2" fi if [ "$VERBOSE" = "YES" ]; then ls -ali "$FIRSTFILE" "$OTHERFILE" read JUNK /dev/null ; then nodebug "$1" and "$2" have different contents. ; return 9 else nodebug Identical. ; return 0 fi } processsamesizefiles () { nodebug '----' pssf "$@" for ONEFILE in "$@" ; do #The following shift compares each file to each other once without comparing a file to itself. shift for TWOFILE in "$@" ; do if filesshouldbelinked "$ONEFILE" "$TWOFILE" ; then linkfiles "$ONEFILE" "$TWOFILE" fi done done } #FIXME - parse for parameters if [ $# -gt 0 ]; then DIRS="$*" echo About to check for links in $DIRS >/dev/stderr else echo Usage: >/dev/stderr echo $0 dirs >/dev/stderr echo Example: >/dev/stderr echo $0 '/usr/src/linux* /usr/src/pcmcia-cs*' >/dev/stderr exit 1 fi #/tmpsizes will hold lines like: #1184 /tmp/bkwrap #FIXME - stuff all info that needs to be equal (owner, size, rights, optionally basename) into the "size" field. #Pipe find directly into while read; use exec kludge or manual looping; straight piping causes last file size block to be skipped from no GT1 var. find $DIRS -xdev -type f `if [ -n "$MINSIZE" ]; then echo "-a -size +${MINSIZE}c" ; fi` -printf '%s %p\n' \ | grep -v '^0 ' | sort -nr | uniq >/tmp/sizes #FIXME - use mktemp if using a file. #FIXME - grep out exclude list if $EXCLUDE set while read SIZE FILENAME ; do nodebug Z $SIZE Z $FILENAME Z if [ "$SIZE" = "$OLDSIZE" ]; then SAMESIZEFILES="$SAMESIZEFILES \"$FILENAME\"" NUMFILES="GT1" else if [ "$NUMFILES" = "GT1" ]; then nodebug $SAMESIZEFILES have the same size. eval processsamesizefiles $SAMESIZEFILES fi SAMESIZEFILES="\"$FILENAME\"" NUMFILES="1" fi OLDSIZE="$SIZE" OLDFILENAME="$FILENAME" done >$ONEFILE.$FILECOUNT # LASTRULETAG="$NEWRULETAG" #done #exec 0<&5 5<&-