11import os
22import pathlib
3-
3+ import glob
4+ import itertools
45
56class DupeFinder :
67 """
78 Simple utility class for finding and manipulating duplicate files
89 """
910
1011 @staticmethod
11- def getDuplicateFileNames (directory1 , directory2 , sameExtension = True ):
12+ def getDuplicateFileNames (directory1 , directory2 ):
1213 """
13- Return a list of duplicate file names in both directories
14+ Return a list of duplicate file name "stems" in both directories.
15+
16+ e.g. :
17+
18+ directory1 : [a.png, b.png]
19+
20+ directory2 : [a.png, b.png]
21+
22+ returns: [a, b]
1423
1524 Parameters
1625 ----------
@@ -28,32 +37,22 @@ def getDuplicateFileNames(directory1, directory2, sameExtension = True):
2837 directory1Path = pathlib .Path (directory1 )
2938 directory2Path = pathlib .Path (directory2 )
3039
31- directory1Files = os .listdir (directory1Path )
32- directory2Files = os .listdir (directory2Path )
33-
34- directory1FilesNoExt = {}
35- directory2FilesNoExt = {}
40+ directory1FilesNoExt = []
41+ directory2FilesNoExt = []
3642
37- if sameExtension == False :
38- for i , file in enumerate (directory1Files ):
39- purePath = pathlib .Path (directory1 + '/' + file )
40- directory1FilesNoExt [purePath .stem ] = purePath .suffix
41- for i , file in enumerate (directory2Files ):
42- purePath = pathlib .Path (directory2 + '/' + file )
43- directory2FilesNoExt [purePath .stem ] = purePath .suffix
43+ for file in os .listdir (directory1Path ):
44+ purePath = pathlib .Path (os .path .join (directory1 , file ))
45+ directory1FilesNoExt .append (purePath .stem )
46+ for file in os .listdir (directory2Path ):
47+ purePath = pathlib .Path (os .path .join (directory2 , file ))
48+ directory2FilesNoExt .append (purePath .stem )
4449
4550 dupeFiles = []
4651
47- if sameExtension :
48- for file in directory1Files :
49- if (file in directory2Files ):
50- dupeFiles .append (file )
51- else :
52- for fileName , ext in directory1FilesNoExt .items ():
53- if fileName in directory2FilesNoExt :
54- dupeFiles .append (fileName + ext )
55- // We need to delete both extensions
56-
52+ for fileName in directory1FilesNoExt :
53+ if fileName in directory2FilesNoExt :
54+ dupeFiles .append (fileName )
55+
5756 return dupeFiles
5857
5958 @staticmethod
@@ -71,5 +70,12 @@ def deleteFiles(fileNames, directory1, directory2):
7170 Path of the second directory
7271 """
7372 for file in fileNames :
74- os .remove (os .path .join (directory1 , file ))
75- os .remove (os .path .join (directory2 , file ))
73+
74+ directory1Path = os .path .join (directory1 , file )
75+ directory2Path = os .path .join (directory2 , file )
76+
77+ directory1Glob = glob .glob ('{}.*' .format (directory1Path ))
78+ directory2Glob = glob .glob ('{}.*' .format (directory2Path ))
79+
80+ for filePath in itertools .chain (directory1Glob , directory2Glob ):
81+ os .remove (filePath )
0 commit comments