| 
 | 1 | +import _pyio  | 
1 | 2 | import array  | 
2 | 3 | import contextlib  | 
3 | 4 | import importlib.util  | 
@@ -3440,5 +3441,87 @@ def test_too_short(self):  | 
3440 | 3441 |             b"zzz", zipfile._strip_extra(b"zzz", (self.ZIP64_EXTRA,)))  | 
3441 | 3442 | 
 
  | 
3442 | 3443 | 
 
  | 
 | 3444 | +class StatIO(_pyio.BytesIO):  | 
 | 3445 | +    """Buffer which remembers the number of bytes that were read."""  | 
 | 3446 | + | 
 | 3447 | +    def __init__(self):  | 
 | 3448 | +        super().__init__()  | 
 | 3449 | +        self.bytes_read = 0  | 
 | 3450 | + | 
 | 3451 | +    def read(self, size=-1):  | 
 | 3452 | +        bs = super().read(size)  | 
 | 3453 | +        self.bytes_read += len(bs)  | 
 | 3454 | +        return bs  | 
 | 3455 | + | 
 | 3456 | + | 
 | 3457 | +class StoredZipExtFileRandomReadTest(unittest.TestCase):  | 
 | 3458 | +    """Tests whether an uncompressed, unencrypted zip entry can be randomly  | 
 | 3459 | +    seek and read without reading redundant bytes."""  | 
 | 3460 | +    def test_stored_seek_and_read(self):  | 
 | 3461 | + | 
 | 3462 | +        sio = StatIO()  | 
 | 3463 | +        # 20000 bytes  | 
 | 3464 | +        txt = b'0123456789' * 2000  | 
 | 3465 | + | 
 | 3466 | +        # The seek length must be greater than ZipExtFile.MIN_READ_SIZE  | 
 | 3467 | +        # as `ZipExtFile._read2()` reads in blocks of this size and we  | 
 | 3468 | +        # need to seek out of the buffered data  | 
 | 3469 | +        read_buffer_size = zipfile.ZipExtFile.MIN_READ_SIZE  | 
 | 3470 | +        self.assertGreaterEqual(10002, read_buffer_size)  # for forward seek test  | 
 | 3471 | +        self.assertGreaterEqual(5003, read_buffer_size)  # for backward seek test  | 
 | 3472 | +        # The read length must be less than MIN_READ_SIZE, since we assume that  | 
 | 3473 | +        # only 1 block is read in the test.  | 
 | 3474 | +        read_length = 100  | 
 | 3475 | +        self.assertGreaterEqual(read_buffer_size, read_length)  # for read() calls  | 
 | 3476 | + | 
 | 3477 | +        with zipfile.ZipFile(sio, "w", compression=zipfile.ZIP_STORED) as zipf:  | 
 | 3478 | +            zipf.writestr("foo.txt", txt)  | 
 | 3479 | + | 
 | 3480 | +        # check random seek and read on a file  | 
 | 3481 | +        with zipfile.ZipFile(sio, "r") as zipf:  | 
 | 3482 | +            with zipf.open("foo.txt", "r") as fp:  | 
 | 3483 | +                # Test this optimized read hasn't rewound and read from the  | 
 | 3484 | +                # start of the file (as in the case of the unoptimized path)  | 
 | 3485 | + | 
 | 3486 | +                # forward seek  | 
 | 3487 | +                old_count = sio.bytes_read  | 
 | 3488 | +                forward_seek_len = 10002  | 
 | 3489 | +                current_pos = 0  | 
 | 3490 | +                fp.seek(forward_seek_len, os.SEEK_CUR)  | 
 | 3491 | +                current_pos += forward_seek_len  | 
 | 3492 | +                self.assertEqual(fp.tell(), current_pos)  | 
 | 3493 | +                self.assertEqual(fp._left, fp._compress_left)  | 
 | 3494 | +                arr = fp.read(read_length)  | 
 | 3495 | +                current_pos += read_length  | 
 | 3496 | +                self.assertEqual(fp.tell(), current_pos)  | 
 | 3497 | +                self.assertEqual(arr, txt[current_pos - read_length:current_pos])  | 
 | 3498 | +                self.assertEqual(fp._left, fp._compress_left)  | 
 | 3499 | +                read_count = sio.bytes_read - old_count  | 
 | 3500 | +                self.assertLessEqual(read_count, read_buffer_size)  | 
 | 3501 | + | 
 | 3502 | +                # backward seek  | 
 | 3503 | +                old_count = sio.bytes_read  | 
 | 3504 | +                backward_seek_len = 5003  | 
 | 3505 | +                fp.seek(-backward_seek_len, os.SEEK_CUR)  | 
 | 3506 | +                current_pos -= backward_seek_len  | 
 | 3507 | +                self.assertEqual(fp.tell(), current_pos)  | 
 | 3508 | +                self.assertEqual(fp._left, fp._compress_left)  | 
 | 3509 | +                arr = fp.read(read_length)  | 
 | 3510 | +                current_pos += read_length  | 
 | 3511 | +                self.assertEqual(fp.tell(), current_pos)  | 
 | 3512 | +                self.assertEqual(arr, txt[current_pos - read_length:current_pos])  | 
 | 3513 | +                self.assertEqual(fp._left, fp._compress_left)  | 
 | 3514 | +                read_count = sio.bytes_read - old_count  | 
 | 3515 | +                self.assertLessEqual(read_count, read_buffer_size)  | 
 | 3516 | + | 
 | 3517 | +                # eof flags test  | 
 | 3518 | +                fp.seek(0, os.SEEK_END)  | 
 | 3519 | +                fp.seek(12345, os.SEEK_SET)  | 
 | 3520 | +                current_pos = 12345  | 
 | 3521 | +                arr = fp.read(read_length)  | 
 | 3522 | +                current_pos += read_length  | 
 | 3523 | +                self.assertEqual(arr, txt[current_pos - read_length:current_pos])  | 
 | 3524 | + | 
 | 3525 | + | 
3443 | 3526 | if __name__ == "__main__":  | 
3444 | 3527 |     unittest.main()  | 
0 commit comments