@@ -68,7 +68,7 @@ def call(image_paths:, **options)
6868
6969 image_files = @profiler . measure ( 'find_image_files' ) { find_image_files ( client ) }
7070
71- @profiler . measure ( ' check_duplicate_files' ) { check_duplicate_files ( image_files , client ) }
71+ check_duplicate_files ( image_files , client )
7272
7373 run_id = @profiler . measure ( 'create_run' ) { create_run }
7474
@@ -130,18 +130,26 @@ def find_image_files(client)
130130 found_images
131131 end
132132
133- def check_duplicate_files ( image_files , client )
133+ def check_duplicate_files ( image_files , _client )
134134 seen_files = { }
135+ duplicate_files = { }
136+
135137 image_files . each do |image_path |
136- file_name = client . parse_file_info ( image_path ) [ :file_name ]
138+ file_name = File . basename ( image_path )
137139
138140 if seen_files [ file_name ]
139- Logger . warn "Duplicate file name detected: '#{ file_name } '. " \
140- "Previous occurrence: '#{ seen_files [ file_name ] } '. " \
141- 'This upload will overwrite the previous one.'
141+ duplicate_files [ file_name ] ||= [ ]
142+ duplicate_files [ file_name ] << image_path
143+ else
144+ seen_files [ file_name ] = image_path
142145 end
143- seen_files [ file_name ] = image_path
144146 end
147+
148+ duplicate_files . each do |filename , paths |
149+ Logger . warn "Found #{ paths . length } duplicate(s) of '#{ filename } '. Duplicates: #{ paths . join ( ', ' ) } "
150+ end
151+
152+ [ seen_files , duplicate_files ]
145153 end
146154
147155 def create_run
@@ -201,6 +209,8 @@ def batch_upload_images(run_id, image_files, client)
201209 errors : [ ]
202210 }
203211
212+ used_filenames , = check_duplicate_files ( image_files , client )
213+
204214 @profiler . measure ( 'process_image_metadata' ) do
205215 image_files . each do |image_path |
206216 metadata_semaphore . async do
@@ -236,8 +246,9 @@ def batch_upload_images(run_id, image_files, client)
236246 zipfile . get_output_stream ( 'manifest.json' ) { |f | f . write ( JSON . generate ( image_metadata ) ) }
237247
238248 image_files . each do |image_path |
239- image_name = File . basename ( image_path )
240- zipfile . add ( image_name , image_path )
249+ filename = File . basename ( image_path )
250+ # Only add files we haven't seen before
251+ zipfile . add ( filename , image_path ) if used_filenames [ filename ] == image_path
241252 end
242253 end
243254 end
0 commit comments