@@ -234,8 +234,19 @@ def strip(cls, data, xids):
234234
235235def _check_zipfile (fp ):
236236 try :
237- if _EndRecData (fp ):
238- return True # file has correct magic number
237+ endrec = _EndRecData (fp )
238+ if endrec :
239+ if endrec [_ECD_ENTRIES_TOTAL ] == 0 and endrec [_ECD_SIZE ] == 0 and endrec [_ECD_OFFSET ] == 0 :
240+ return True # Empty zipfiles are still zipfiles
241+ elif endrec [_ECD_DISK_NUMBER ] == endrec [_ECD_DISK_START ]:
242+ # Central directory is on the same disk
243+ fp .seek (sum (_handle_prepended_data (endrec )))
244+ if endrec [_ECD_SIZE ] >= sizeCentralDir :
245+ data = fp .read (sizeCentralDir ) # CD is where we expect it to be
246+ if len (data ) == sizeCentralDir :
247+ centdir = struct .unpack (structCentralDir , data ) # CD is the right size
248+ if centdir [_CD_SIGNATURE ] == stringCentralDir :
249+ return True # First central directory entry has correct magic number
239250 except OSError :
240251 pass
241252 return False
@@ -258,6 +269,22 @@ def is_zipfile(filename):
258269 pass
259270 return result
260271
272+ def _handle_prepended_data (endrec , debug = 0 ):
273+ size_cd = endrec [_ECD_SIZE ] # bytes in central directory
274+ offset_cd = endrec [_ECD_OFFSET ] # offset of central directory
275+
276+ # "concat" is zero, unless zip was concatenated to another file
277+ concat = endrec [_ECD_LOCATION ] - size_cd - offset_cd
278+ if endrec [_ECD_SIGNATURE ] == stringEndArchive64 :
279+ # If Zip64 extension structures are present, account for them
280+ concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator )
281+
282+ if debug > 2 :
283+ inferred = concat + offset_cd
284+ print ("given, inferred, offset" , offset_cd , inferred , concat )
285+
286+ return offset_cd , concat
287+
261288def _EndRecData64 (fpin , offset , endrec ):
262289 """
263290 Read the ZIP64 end-of-archive records and use that to update endrec
@@ -1501,28 +1528,21 @@ def _RealGetContents(self):
15011528 raise BadZipFile ("File is not a zip file" )
15021529 if self .debug > 1 :
15031530 print (endrec )
1504- size_cd = endrec [_ECD_SIZE ] # bytes in central directory
1505- offset_cd = endrec [_ECD_OFFSET ] # offset of central directory
15061531 self ._comment = endrec [_ECD_COMMENT ] # archive comment
15071532
1508- # "concat" is zero, unless zip was concatenated to another file
1509- concat = endrec [_ECD_LOCATION ] - size_cd - offset_cd
1510- if endrec [_ECD_SIGNATURE ] == stringEndArchive64 :
1511- # If Zip64 extension structures are present, account for them
1512- concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator )
1533+ offset_cd , concat = _handle_prepended_data (endrec , self .debug )
1534+
1535+ # self.start_dir: Position of start of central directory
1536+ self .start_dir = offset_cd + concat
15131537
15141538 # store the offset to the beginning of data for the
15151539 # .data_offset property
15161540 self ._data_offset = concat
15171541
1518- if self .debug > 2 :
1519- inferred = concat + offset_cd
1520- print ("given, inferred, offset" , offset_cd , inferred , concat )
1521- # self.start_dir: Position of start of central directory
1522- self .start_dir = offset_cd + concat
15231542 if self .start_dir < 0 :
15241543 raise BadZipFile ("Bad offset for central directory" )
15251544 fp .seek (self .start_dir , 0 )
1545+ size_cd = endrec [_ECD_SIZE ]
15261546 data = fp .read (size_cd )
15271547 fp = io .BytesIO (data )
15281548 total = 0
0 commit comments