@@ -8,47 +8,13 @@ class DupeFinder:
88 Simple utility class for finding and manipulating duplicate files
99 """
1010
11- @staticmethod
12- def _getAllDirectoryPaths (directories , recursivelySearch ):
13- """
14- Returns a list of directory paths.
15-
16- Parameters
17- ----------
18- directories : list or str
19-
20- List of string paths to multiple directories or singular string path to one
21-
22- recursivelySearch : boolean
23-
24- True or False depending if recursively searching is required
25-
26- Returns
27- -------
28- list
29-
30- A list of directory Path objects of all the directories passed in as well as any
31- potential recursive directories inside
32- """
33- destinationDirectoryPaths = []
34-
35- if type (directories ) is list :
36- for destination in directories :
37- destinationDirectoryPaths .append (pathlib .Path (destination ))
38- else :
39- destinationDirectoryPaths .append (pathlib .Path (destination ))
40-
41- if recursivelySearch :
42- for destinationDir in destinationDirectoryPaths :
43- destinationDirectoryPaths .extend ([f .path for f in os .scandir (destinationDir ) if f .is_dir ()])
44-
45- return destinationDirectoryPaths
46-
4711 @staticmethod
4812 def getDuplicateFileNames (directory1 , destinationsToDelete , recursivelySearch , matchAllDirectories ):
4913 """
50- Returns a list of duplicate file name "stems" using directory1 as the
51- master directory and then recursively or not (if passed in True) checking all the destinationsToDelete for
14+ Returns a tuple containing list of duplicate file name "stems" and a count of directories dupe file name "stems"
15+ were found.
16+
17+ It uses directory1 as the master directory and then recursively or not (if recursivelySearch is True) checking all the destinationsToDelete for
5218 duplicate file "stems". If matchAllDirectories is set to true, then all destinationsToDelete (and recursive directories if required)
5319 must all have the same file name "stem"
5420
@@ -64,7 +30,7 @@ def getDuplicateFileNames(directory1, destinationsToDelete, recursivelySearch, m
6430
6531 destinationsToDelete : [a.png, b.png, dir1 : [a.png, c.png]]
6632
67- returns: [a, b]
33+ returns: ( [a, b], 3)
6834
6935 ---
7036
@@ -78,7 +44,7 @@ def getDuplicateFileNames(directory1, destinationsToDelete, recursivelySearch, m
7844
7945 destinationsToDelete : [a.png, b.png, dir1 : [a.png, c.png]]
8046
81- returns: [a]
47+ returns: ( [a], 3)
8248
8349 ---
8450
@@ -98,61 +64,73 @@ def getDuplicateFileNames(directory1, destinationsToDelete, recursivelySearch, m
9864
9965 Returns
10066 -------
101- list
67+
68+ (list, int):
69+
70+ list
10271
103- A list of the duplicate file names between all directories using the first parameter passed in
104- as the master directory. If no duplicate files are found, an empty list is returned
72+ A list of the duplicate file names between all directories using the first parameter passed in
73+ as the master directory. If no duplicate files are found, an empty list is returned
74+
75+ int
76+
77+ count of directeries dupe file names were found
10578 """
10679 directory1Path = pathlib .Path (directory1 )
107- destinationDirectoryPaths = DupeFinder ._getAllDirectoryPaths (destinationsToDelete , recursivelySearch )
80+ destinationDirectoryPaths = []
81+
82+ if type (destinationsToDelete ) is list :
83+ for destination in destinationsToDelete :
84+ destinationDirectoryPaths .append (pathlib .Path (destination ))
85+ else :
86+ destinationDirectoryPaths .append (pathlib .Path (destinationsToDelete ))
87+
88+ if recursivelySearch :
89+ for destinationDir in destinationDirectoryPaths :
90+ destinationDirectoryPaths .extend ([f .path for f in os .scandir (destinationDir ) if f .is_dir ()])
10891
10992 directory1FilesNoExt = []
93+ destinationDirectoriesFilesListNoExt = []
11094
11195 for file in os .listdir (directory1Path ):
11296 purePath = pathlib .Path (os .path .join (directory1 , file ))
11397 directory1FilesNoExt .append (purePath .stem )
11498
115- if matchAllDirectories :
116-
117- destinationDirectoriesFilesListNoExt = []
99+ for destinationDir in destinationDirectoryPaths :
118100
119- for destinationDir in destinationDirectoryPaths :
101+ destinationDirectoriesFilesNoExt = []
120102
121- destinationDirectoriesFilesNoExt = []
103+ for file in os .listdir (destinationDir ):
104+ purePath = pathlib .Path (os .path .join (destinationDir , file ))
105+ destinationDirectoriesFilesNoExt .append (purePath .stem )
106+
107+ destinationDirectoriesFilesListNoExt .append (destinationDirectoriesFilesNoExt )
122108
123- for file in os .listdir (destinationDir ):
124- purePath = pathlib .Path (os .path .join (destinationDir , file ))
125- destinationDirectoriesFilesNoExt .append (purePath .stem )
126-
127- destinationDirectoriesFilesListNoExt .append (destinationDirectoriesFilesNoExt )
109+ dupeFiles = []
110+ dupeDirectoryIndexes = set ()
128111
129- dupeFiles = []
112+ for fileName in directory1FilesNoExt :
130113
131- for fileName in directory1FilesNoExt :
114+ if matchAllDirectories :
132115 fileCount = 0
133116 for destinationDirectoryFilesListNoExt in destinationDirectoriesFilesListNoExt :
134117 if fileName in destinationDirectoryFilesListNoExt :
135118 fileCount += 1
136119
137120 if fileCount == len (destinationDirectoriesFilesListNoExt ):
138- dupeFiles .append (fileName )
139-
140- return dupeFiles
141-
142- destinationDirectoriesFilesNoExt = set ()
143-
144- for destinationDir in destinationDirectoryPaths :
145- for file in os .listdir (destinationDir ):
146- purePath = pathlib .Path (os .path .join (destinationDir , file ))
147- destinationDirectoriesFilesNoExt .add (purePath .stem )
148-
149- dupeFiles = []
150-
151- for fileName in directory1FilesNoExt :
152- if fileName in destinationDirectoriesFilesNoExt :
153- dupeFiles .append (fileName )
121+ dupeFiles .append (fileName )
122+ else :
123+
124+ for x in range (len (destinationDirectoriesFilesListNoExt )):
125+ if fileName in destinationDirectoriesFilesListNoExt [x ]:
126+ dupeFiles .append (fileName )
127+ dupeDirectoryIndexes .add (x )
128+ break
154129
155- return dupeFiles
130+ if matchAllDirectories :
131+ return (dupeFiles , len (destinationDirectoryPaths ) + 1 )
132+ else :
133+ return (dupeFiles , len (dupeDirectoryIndexes ) + 1 )
156134
157135 @staticmethod
158136 def deleteFiles (fileNames , destinationsToDelete , recursivelySearch = False ):
0 commit comments