#!/usr/bin/python """ Renames one or more files to {sha256sum}.{correct_file_extension} . -c parameter will actually perform the change, -n will not (dry-run mode). If -t {target_dir} supplied, will move the normalized files into this target directory -v verbose. NOTE - unless you use "-n", this _will_ rename, delete, and/or move the files provided. You should not use this on your only copy of any given file. """ #Copyright 2017 William Stearns #Released under the GPL. #v2.1 import hashlib import sys import os import os.path import filecmp import argparse #If the argparse module is not installed by default, try one of the following: #sudo yum install python-argparse #RPM-based linux #sudo port install py-argparse #Mac OS with ports #sudo -H pip install argparse #Python's pip module installer tool import magic #If the magic module is not installed by default, try one of the following: #sudo yum install python-magic #RPM-based linux #sudo port install magic #Mac OS with ports #sudo -H pip install python-magic #Python's pip module installer tool import mimesupport def debug(message): """Write debug message to stderr""" sys.stderr.write(message + '\n') def sha256_checksum(filename, block_size=65536): """ Calculate the sha256 checksum of the given file.""" sha256 = hashlib.sha256() try: with open(filename, 'rb') as f: for block in iter(lambda: f.read(block_size), b''): sha256.update(block) return sha256.hexdigest() except: debug("Error reading from " + str(filename)) return '' def mime_of(a_file): """Return the mime type of a given file.""" try: return magic.from_file(a_file, mime=True) except: #Apparently old versions of magic don't have from_file try: m = magic.open(magic.MAGIC_MIME) m.load() big_mime = m.file(a_file) return big_mime #Note; this may have a semicolon followed by more details except: #likely file read error return None def file_rename(source, dest, actually_change): """Rename a file, but proceed carefully with tests.""" source_s = str(source) dest_s = str(dest) if actually_change: modifier = "" else: modifier = " would have" #First see if we have the right name already if source_s == dest_s: #The filename we were handed is already normalized - do not take any action. if verbose: debug("Already normalized: " + source_s) #So filename is not normalized, now see if the dest exists at all elif not os.path.isfile(dest_s): if actually_change: os.rename(source_s, dest_s) if verbose: debug("Normalized name does not exist," + modifier + " renamed " + source_s + " to " + dest_s) #So filename is not normalized and dest file already exists, now see if the files have identical content elif filecmp.cmp(source_s, dest_s, False): #Byte for byte compare of the files (when using shallow=False at the end), returns True if equal content if actually_change: os.remove(source_s) if verbose: debug("File contents equal between " + source_s + " and " + dest_s + " ," + modifier + " deleted " + source_s) #So filename is not normalized, dest file already exists, and files have different content else: debug("WARNING: File contents DIFFER between " + source_s + " and " + dest_s + " , no action taken ") parser = argparse.ArgumentParser(description='Normalize name(s) of supplied file(s).') parser.add_argument('-f', '--file', help='File to normalize', required=False, nargs='*') parser.add_argument('-c', '--change', help='Actually change filesystem', dest='change', action='store_true') parser.add_argument('-n', '--no-change', help='Dry run; do not actually change filesystem', dest='change', action='store_false') parser.add_argument('-s', '--stdin', help='Read filenames from stdin', required=False, action='store_true') parser.add_argument('-t', '--target', help='Target directory into which to move normalized files', required=False) parser.add_argument('-v', '--verbose', help='Verbose, more detail about each action', action='store_true') parser.set_defaults(change=False) #Can optionally use , type=argparse.FileType('r') above; this returns an array of already-opened filehandles. args = vars(parser.parse_args()) filenames = [] #This will hold tuples; [0] is the path, [1] is the filename if args['file'] is not None: for one_file in args['file']: filenames.append(os.path.split(one_file)) if args['stdin']: for one_file in sys.stdin: filenames.append(os.path.split(one_file.replace('\n', ''))) #We test later to see if these are valid files. if len(filenames) == 0: debug("No filenames supplied, aborting.") sys.exit(1) if args['target'] is not None: if os.path.isdir(str(args['target'])) and os.access(str(args['target']), os.W_OK): dest_dir = args['target'] + '/' else: print(str(args['target']) + " does not appear to be writable, exiting.") sys.exit(1) else: dest_dir = '' verbose = args['verbose'] for one_file_tuple in filenames: one_path = one_file_tuple[0] one_file = one_file_tuple[1] full_path = os.path.join(one_path, one_file) if one_file is None or one_file == '': if verbose: debug("Skipping directory " + one_path) elif os.path.islink(full_path): if verbose: debug("Skipping symlink " + full_path) elif os.path.isfile(full_path): mime_type = mime_of(full_path) if mime_type is None: debug("File read error on " + full_path) else: #debug("\t >>>> " + mime_type) sha_sum = sha256_checksum(full_path) extension = mimesupport.ext_of_mime(mime_type, None, full_path) #debug('>>> ' + str(extension)) if dest_dir == '': #We're placing files back in the same dirs they came from file_rename(full_path, os.path.join(one_path, str(sha_sum) + '.' + str(extension)), args['change']) else: #We're moving all files into a single target directory file_rename(full_path, os.path.join(dest_dir, str(sha_sum) + '.' + str(extension)), args['change']) else: if verbose: debug("Skipping " + full_path + " as it does not appear to be a file.")