commit 10025a7fc483089af9bb9c38484b57009e8786e1
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Fri, 6 Jan 2012 20:20:01 +0100
initial write
Diffstat:
rdupes.py | | | 60 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 60 insertions(+), 0 deletions(-)
diff --git a/rdupes.py b/rdupes.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import hashlib
+
+hashes = {}
+sizes = {}
+
+def register(f, h, s):
+ print >> sys.stderr, f
+ if h in hashes.keys():
+ hashes[h].append(f)
+ assert(sizes[h] == s)
+ else:
+ hashes[h] = [f]
+ sizes[h] = s
+
+def hashfile(f):
+ sha1 = hashlib.sha1()
+ fp = open(f, 'rb')
+ try:
+ sha1.update(fp.read())
+ finally:
+ fp.close()
+ return sha1.hexdigest()
+
+def prefix(p, s):
+ return [os.path.join(p, x) for x in s]
+
+def explore(d):
+ size = 1 # sort folders before files
+ hashes = []
+ #print "explore %s" % d
+ files = os.listdir(d)
+ for f in prefix(d, files):
+ if os.path.isdir(f):
+ h, s = explore(f)
+ else:
+ try:
+ s = os.stat(f).st_size
+ h = hashfile(f)
+ except OSError:
+ continue
+ register(f, h, s)
+ hashes.append(h)
+ size += s
+ sha1 = hashlib.sha1()
+ hashes.sort()
+ sha1.update('d' + '-'.join(hashes))
+ h = sha1.hexdigest()
+ return h, size
+
+for d in sys.argv[1:]:
+ explore(d)
+
+hashes2 = [(sizes[x], hashes[x]) for x in hashes.keys() if len(hashes[x]) > 1]
+hashes2.sort(reverse=True)
+print '\n'.join([str(s) + ": " + ' '.join(f) for (s, f) in hashes2])
+