1010
1111import hashlib
1212import os
13+ import sys
1314from functools import partial
1415from os .path import getsize , join
1516
@@ -34,6 +35,7 @@ class node():
3435 filepath = None
3536 filesize = None
3637 filehash = None
38+ samewith = None
3739
3840 def __init__ (self , fpath ):
3941 self .filepath = fpath
@@ -54,9 +56,11 @@ def __hash__(self):
5456
5557huge_hash_table = {}
5658to_delete_list = []
59+ file_count = 0
5760
5861for root , dirs , files in os .walk (current_directory ):
5962 for file in files :
63+ file_count = file_count + 1
6064 p = node (os .path .join (root , file ))
6165 fz = str (p .filesize )
6266 if fz in huge_hash_table .keys ():
@@ -66,6 +70,7 @@ def __hash__(self):
6670
6771 for i in huge_hash_table [fz ]:
6872 if not dup_flag and (hash_of_p == i .gethash ()):
73+ p .samewith = i .filepath
6974 dup_flag = True
7075
7176 if dup_flag :
@@ -77,24 +82,28 @@ def __hash__(self):
7782 else :
7883 huge_hash_table [fz ] = []
7984 huge_hash_table [fz ].append (p )
85+ sys .stdout .write ("\r %d files scanned, %d duplicated files found." % (
86+ file_count , len (to_delete_list )))
87+ sys .stdout .flush ()
8088
8189print ("Files to be deleted:" )
8290log = open ("rmdup.files" , "w+" )
8391for i in to_delete_list :
8492 print (i .filepath )
85- print (i .filepath , file = log )
93+ print ("%s>>>%s" % ( i .filepath , i . samewith ) , file = log )
8694log .close ()
8795
8896print ("Checkout rmdup.files for details." )
8997
9098s = input ("Just delete ALL(y/n/f)? F to load entries from rmdup.files >" )
91- if (s .lower == "y" ):
99+ if (s .lower () == "y" ):
92100 for i in to_delete_list :
93101 print ("Deleting " , i .filepath )
94102 os .remove (i .filepath )
95103
96- if (s .lower == "f" ):
104+ if (s .lower () == "f" ):
97105 with open ("rmdup.files" , "r" ) as f :
98106 for line in f .readlines ():
99- print ("Deleting " , line )
100- os .remove (line )
107+ file_to_delete = line .split (">>>" )[0 ]
108+ print ("Deleting " , file_to_delete )
109+ os .remove (file_to_delete )
0 commit comments