rdupes

recursively search for duplicate files
git clone https://a3nm.net/git/rdupes/
Log | Files | Refs

commit bd28a3f9a321d956c5f6129067300daf32bfc3dc
parent c610ec8516c279a2b9271fa2e330745250429762
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri,  9 Dec 2016 17:34:23 +0100

uncommitted changes

Diffstat:
rdupes.py | 29+++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/rdupes.py b/rdupes.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +# TODO don't show file duplicates under duplicate folders import os import sys import hashlib @@ -11,7 +12,11 @@ def hashfile(f): sha1 = hashlib.sha1() fp = open(f, 'rb') try: - sha1.update(fp.read()) + while True: + buf = fp.read(16*1024*1024) + if not buf: + break + sha1.update(buf) finally: fp.close() return sha1.hexdigest() @@ -25,17 +30,17 @@ def explore(d): #print "explore %s" % d files = os.listdir(d) for f in prefix(d, files): - if os.path.isdir(f): - h, s = explore(f) - else: - try: - s = os.stat(f).st_size - h = hashfile(f) - except OSError: - continue - register(f, h, s) - hashes.append(h) - size += s + try: + if os.path.isdir(f): + h, s = explore(f) + else: + s = os.stat(f).st_size + h = hashfile(f) + register(f, h, s) + hashes.append(h) + size += s + except (OSError, IOError): + continue sha1 = hashlib.sha1() hashes.sort() sha1.update('d' + '-'.join(hashes))