@@ -36,14 +36,28 @@ def base_key
3636 # create it.
3737 # @raise [StandardError] if there's a zip file for this druid-version, but it looks too small to be complete.
3838 def find_or_create_zip!
39- if File . exist? ( file_path )
39+ if exist?
4040 raise "zip already exists, but size (#{ total_part_size } ) is smaller than the moab version size (#{ moab_version_size } )!" unless zip_size_ok?
4141 FileUtils . touch ( file_path )
4242 else
4343 create_zip!
4444 end
4545 end
4646
47+ def exist?
48+ File . exist? ( file_path )
49+ end
50+
51+ # @return [Boolean] true if there is a match between the zip part files and their md5 sidecar files
52+ def complete?
53+ # There is at least one part file
54+ return false if part_paths . empty?
55+ # The set of md5 sidecar files matches the set of part files
56+ return false unless part_keys . to_set == part_keys_from_md5_sidecars . to_set
57+ # Check each md5 sidecar file against the zip part file
58+ druid_version_zip_parts . all? ( &:md5_match? )
59+ end
60+
4761 # Creates a zip of Druid-Version content.
4862 # Changes directory so that the storage root (and druid tree) are not part of
4963 # the archival directory structure, just the object, e.g. starting at 'ab123cd4567/...' directory,
@@ -54,7 +68,7 @@ def create_zip!
5468 combined , status = Open3 . capture2e ( zip_command , chdir : work_dir . to_s )
5569 raise "zipmaker failure #{ combined } " unless status . success?
5670 unless zip_size_ok?
57- part_cleanup_errors = cleanup_zip_parts !
71+ part_cleanup_errors = cleanup_zip_parts_with_rescue !
5872 part_cleanup_err_msg = "\n -- errors cleaning up zip parts: #{ part_cleanup_errors . map ( &:inspect ) } " if part_cleanup_errors . present?
5973 raise "zip size (#{ total_part_size } ) is smaller than the moab version size (#{ moab_version_size } )! zipmaker failure #{ combined } #{ part_cleanup_err_msg } "
6074 end
@@ -172,6 +186,18 @@ def moab_version_size
172186 moab_version_files . sum { |f | File . size ( f ) }
173187 end
174188
189+ # Deletes all zip part files and their md5 sidecar files from local zip storage
190+ def cleanup_zip_parts!
191+ parts_and_checksums_paths . each { |filepath | File . delete ( filepath ) }
192+ end
193+
194+ # @return [Array<DruidVersionZipPart>] all parts for this DruidVersionZip
195+ def druid_version_zip_parts
196+ part_keys . map do |part_key |
197+ Replication ::DruidVersionZipPart . new ( self , part_key )
198+ end
199+ end
200+
175201 private
176202
177203 # Throws an error if any of the files in the moab are not yet readable. For example due to
@@ -184,7 +210,7 @@ def check_moab_version_readability!
184210 moab_version_files . map { |f | File . stat ( f ) }
185211 end
186212
187- def cleanup_zip_parts !
213+ def cleanup_zip_parts_with_rescue !
188214 errors = [ ]
189215 parts_and_checksums_paths . map do |p |
190216 File . delete ( p )
@@ -219,5 +245,14 @@ def fetch_zip_version
219245 def zip_version_regexp
220246 /This is (Zip \d +(\. \d )+\s *(\( .*\d {4}\) )?)/
221247 end
248+
249+ # @return [Array<String>] relative paths, i.e. s3_part_keys for existing parts based on the md5 sidecar files
250+ def part_keys_from_md5_sidecars
251+ md5_sidecar_paths . map { |md5_path | md5_path . relative_path_from ( zip_storage ) . to_s . delete_suffix ( '.md5' ) }
252+ end
253+
254+ def md5_sidecar_paths
255+ Pathname . glob ( File . join ( zip_storage , s3_key ( '.*.md5' ) ) )
256+ end
222257 end
223258end
0 commit comments