commit bd28a3f9a321d956c5f6129067300daf32bfc3dc
parent c610ec8516c279a2b9271fa2e330745250429762
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Fri, 9 Dec 2016 17:34:23 +0100
uncommitted changes
Diffstat:
1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/rdupes.py b/rdupes.py
@@ -1,5 +1,6 @@
#!/usr/bin/env python
+# TODO don't show file duplicates under duplicate folders
import os
import sys
import hashlib
@@ -11,7 +12,11 @@ def hashfile(f):
sha1 = hashlib.sha1()
fp = open(f, 'rb')
try:
- sha1.update(fp.read())
+ while True:
+ buf = fp.read(16*1024*1024)
+ if not buf:
+ break
+ sha1.update(buf)
finally:
fp.close()
return sha1.hexdigest()
@@ -25,17 +30,17 @@ def explore(d):
#print "explore %s" % d
files = os.listdir(d)
for f in prefix(d, files):
- if os.path.isdir(f):
- h, s = explore(f)
- else:
- try:
- s = os.stat(f).st_size
- h = hashfile(f)
- except OSError:
- continue
- register(f, h, s)
- hashes.append(h)
- size += s
+ try:
+ if os.path.isdir(f):
+ h, s = explore(f)
+ else:
+ s = os.stat(f).st_size
+ h = hashfile(f)
+ register(f, h, s)
+ hashes.append(h)
+ size += s
+ except (OSError, IOError):
+ continue
sha1 = hashlib.sha1()
hashes.sort()
sha1.update('d' + '-'.join(hashes))