Skip to content
47 changes: 47 additions & 0 deletions Lib/test/test_zipfile/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3447,6 +3447,53 @@ def test_too_short(self):
self.assertEqual(
b"zzz", zipfile._Extra.strip(b"zzz", (self.ZIP64_EXTRA,)))

class StoredZipExtFileRandomAccessTest(unittest.TestCase):
def test_random_access(self):
from _pyio import BytesIO
class StatIO(BytesIO):
def __init__(self):
super().__init__()
self.bytes_read = 0

def read(self, size=-1):
bs = super().read(size)
self.bytes_read += len(bs)
return bs

def get_bytes_read(self):
return self.bytes_read

sio = StatIO()
# 100000 bytes
txt = b'0123456789'*10000

# Check seek on a file
with zipfile.ZipFile(sio, "w", compression=zipfile.ZIP_STORED) as zipf:
zipf.writestr("foo.txt", txt)

with zipfile.ZipFile(sio, "r") as zipf:
with zipf.open("foo.txt", "r") as fp:
br = sio.get_bytes_read()
fp.seek(50000, os.SEEK_CUR)
self.assertEqual(sio.get_bytes_read() - br, 0, 'seek produces redundant read!')

b = fp.read(100)
self.assertEqual(b, txt[:100])

# seek length must be greater than ZipExtFile.MIN_READ_SIZE (4096)
# backward seek
br = sio.get_bytes_read()
fp.seek(5000, os.SEEK_CUR)
b = fp.read(100)
self.assertEqual(b, txt[50000:50100])
self.assertLessEqual(sio.get_bytes_read() - br, 4096, 'read redundant bytes during backward seek!')

# forward seek
br = sio.get_bytes_read()
fp.seek(-40000, os.SEEK_CUR)
b = fp.read(100)
self.assertEqual(b, txt[10100:10200])
self.assertLessEqual(sio.get_bytes_read() - br, 4096, 'read redundant bytes during forward seek!')

if __name__ == "__main__":
unittest.main()
5 changes: 4 additions & 1 deletion Lib/zipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,13 +1162,16 @@ def seek(self, offset, whence=os.SEEK_SET):
self._offset = buff_offset
read_offset = 0
# Fast seek uncompressed unencrypted file
elif self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
elif self._compress_type == ZIP_STORED and self._decrypter is None:
# disable CRC checking after first seeking - it would be invalid
self._expected_crc = None
# seek actual file taking already buffered data into account
read_offset -= len(self._readbuffer) - self._offset
self._fileobj.seek(read_offset, os.SEEK_CUR)
self._left -= read_offset
self._compress_left -= read_offset
if self._eof and read_offset < 0:
self._eof = False
read_offset = 0
# flush read buffer
self._readbuffer = b''
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support fast forward seek in uncompressed unencrypted :class:`!zipfile.ZipExtFile`.
Loading