Skip to content

Commit 64ffb2c

Browse files
authored
Merge pull request #11 from NYPD/v0.0.4
V0.0.4
2 parents 0d1db24 + e45dba1 commit 64ffb2c

File tree

3 files changed

+95
-28
lines changed

3 files changed

+95
-28
lines changed

phodupe/__main__.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,22 @@ def main():
1717
if user_input == 'y':
1818
recursivelySearch = True
1919

20-
dupe_files = DupeFinder.getDuplicateFileNames(destination1, destinationsToDelete, recursivelySearch)
20+
allDirectoriesMustHaveSameDupes = False
2121

22-
if len(dupe_files) is 0:
22+
user_input = input("Must all directories have the same exact file name? 'y' or 'n'\n")
23+
24+
if user_input == 'y':
25+
allDirectoriesMustHaveSameDupes = True
26+
27+
dupeFileInfoTuple = DupeFinder.getDuplicateFileNames(destination1, destinationsToDelete, recursivelySearch, allDirectoriesMustHaveSameDupes)
28+
dupe_files = dupeFileInfoTuple[0]
29+
dupeFileLength = len(dupe_files)
30+
31+
if dupeFileLength is 0:
2332
print('No dupe files found!')
2433
exit()
2534

26-
user_input = input("{} duplicate file names found. Enter 'y' to delete or 'n' to abort:\n".format(len(dupe_files)))
35+
user_input = input("{} duplicate file names found across {} directories. Enter 'y' to delete or 'n' to abort:\n".format(dupeFileLength, dupeFileInfoTuple[1]))
2736

2837
if user_input == 'y':
2938
destinationsToDelete.append(destination1)

phodupe/dupe_finder.py

Lines changed: 82 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,44 @@ class DupeFinder:
99
"""
1010

1111
@staticmethod
12-
def getDuplicateFileNames(directory1, destinationsToDelete, recursivelySearch):
12+
def getDuplicateFileNames(directory1, destinationsToDelete, recursivelySearch, matchAllDirectories):
1313
"""
14-
Returns a list of duplicate file name "stems" using directory1 as the
15-
master directory and then recursively or not (if passed in True) checking all the destinationsToDelete.
14+
Returns a tuple containing list of duplicate file name "stems" and a count of directories dupe file name "stems"
15+
were found.
1616
17-
e.g. :
17+
It uses directory1 as the master directory and then recursively or not (if recursivelySearch is True) checking all the destinationsToDelete for
18+
duplicate file "stems". If matchAllDirectories is set to true, then all destinationsToDelete (and recursive directories if required)
19+
must all have the same file name "stem"
20+
21+
---
22+
23+
e.g. 1 :
1824
1925
recursivelySearch = True
2026
27+
matchAllDirectories = False
28+
2129
directory1 : [a.png, b.png]
2230
2331
destinationsToDelete : [a.png, b.png, dir1 : [a.png, c.png]]
2432
25-
returns: [a, b]
33+
returns: ([a, b], 3)
34+
35+
---
36+
37+
e.g. 2 :
38+
39+
recursivelySearch = True
40+
41+
matchAllDirectories = True
42+
43+
directory1 : [a.png, b.png]
44+
45+
destinationsToDelete : [a.png, b.png, dir1 : [a.png, c.png]]
46+
47+
returns: ([a], 3)
48+
49+
---
2650
2751
Parameters
2852
----------
@@ -40,44 +64,72 @@ def getDuplicateFileNames(directory1, destinationsToDelete, recursivelySearch):
4064
4165
Returns
4266
-------
43-
list
67+
68+
(list, int):
69+
70+
list
71+
72+
A list of the duplicate file names between all directories using the first parameter passed in
73+
as the master directory. If no duplicate files are found, an empty list is returned
4474
45-
A list of the duplicate file names between all directories using the first parameter passed in
46-
as the master directory. If no duplicate files are found, an empty list is returned
75+
int
76+
77+
count of directeries dupe file names were found
4778
"""
4879
directory1Path = pathlib.Path(directory1)
49-
50-
destinationDirectories = []
80+
destinationDirectoryPaths = []
5181

5282
if type(destinationsToDelete) is list:
5383
for destination in destinationsToDelete:
54-
destinationDirectories.append(pathlib.Path(destination))
84+
destinationDirectoryPaths.append(pathlib.Path(destination))
5585
else:
56-
destinationDirectories.append(pathlib.Path(destination))
86+
destinationDirectoryPaths.append(pathlib.Path(destinationsToDelete))
5787

5888
if recursivelySearch:
59-
for destinationDir in destinationDirectories:
60-
destinationDirectories.extend([f.path for f in os.scandir(destinationDir) if f.is_dir()])
89+
for destinationDir in destinationDirectoryPaths:
90+
destinationDirectoryPaths.extend([f.path for f in os.scandir(destinationDir) if f.is_dir()])
6191

6292
directory1FilesNoExt = []
63-
destinationDirectoriesFilesNoExt = set()
93+
destinationDirectoriesFilesListNoExt = []
6494

6595
for file in os.listdir(directory1Path):
6696
purePath = pathlib.Path(os.path.join(directory1, file))
6797
directory1FilesNoExt.append(purePath.stem)
6898

69-
for destinationDir in destinationDirectories:
99+
for destinationDir in destinationDirectoryPaths:
100+
101+
destinationDirectoriesFilesNoExt = []
102+
70103
for file in os.listdir(destinationDir):
71104
purePath = pathlib.Path(os.path.join(destinationDir, file))
72-
destinationDirectoriesFilesNoExt.add(purePath.stem)
105+
destinationDirectoriesFilesNoExt.append(purePath.stem)
106+
107+
destinationDirectoriesFilesListNoExt.append(destinationDirectoriesFilesNoExt)
73108

74-
dupeFiles = []
109+
dupeFiles = set()
110+
dupeDirectoryIndexes = set()
75111

76112
for fileName in directory1FilesNoExt:
77-
if fileName in destinationDirectoriesFilesNoExt:
78-
dupeFiles.append(fileName)
113+
114+
if matchAllDirectories:
115+
fileCount = 0
116+
for destinationDirectoryFilesListNoExt in destinationDirectoriesFilesListNoExt:
117+
if fileName in destinationDirectoryFilesListNoExt:
118+
fileCount += 1
119+
120+
if fileCount == len(destinationDirectoriesFilesListNoExt):
121+
dupeFiles.add(fileName)
122+
else:
123+
124+
for x in range(len(destinationDirectoriesFilesListNoExt) - 1):
125+
if fileName in destinationDirectoriesFilesListNoExt[x]:
126+
dupeFiles.add(fileName)
127+
dupeDirectoryIndexes.add(x)
79128

80-
return dupeFiles
129+
if matchAllDirectories:
130+
return (dupeFiles, len(destinationDirectoryPaths) + 1)
131+
else:
132+
return (dupeFiles, len(dupeDirectoryIndexes) + 1)
81133

82134
@staticmethod
83135
def deleteFiles(fileNames, destinationsToDelete, recursivelySearch=False):
@@ -95,7 +147,13 @@ def deleteFiles(fileNames, destinationsToDelete, recursivelySearch=False):
95147
for file in fileNames:
96148

97149
for destination in destinationsToDelete:
98-
directoryGlob = glob.glob('{}{}{}.*'.format(destination, '/**/',file), recursive=recursivelySearch)
99-
150+
151+
if recursivelySearch:
152+
fileDestination = '{}{}{}.*'.format(destination, '/**/',file)
153+
else:
154+
fileDestination = '{}/{}.*'.format(destination,file)
155+
156+
directoryGlob = glob.glob(fileDestination, recursive=recursivelySearch)
157+
100158
for filePath in directoryGlob:
101-
os.remove(filePath)
159+
os.remove(filePath)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name='phodupe',
5-
version='v0.0.3',
5+
version='v0.0.4',
66
packages=['phodupe'],
77
url='https://github.com/NYPD/phodupe',
88
license='MIT',

0 commit comments

Comments
 (0)