diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index c9d69cf5094095..91a32ef1c4d692 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -368,7 +368,7 @@ be finalized; only the internally used file object will be closed. See the .. versionadded:: 3.2 Added support for the context management protocol. -.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1, stream=False) +.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1, stream=False, blocking_factor=None) All following arguments are optional and can be accessed as instance attributes as well. @@ -422,6 +422,11 @@ be finalized; only the internally used file object will be closed. See the If *stream* is set to :const:`True` then while reading the archive info about files in the archive are not cached, saving memory. + If *blocking_factor* is given, the record size will be blocking_factor * 512 bytes. + After writing to an archive, the size will be padded with zeros to an integer number + of records. The default blocking factor is 20, corresponding to a record size of + 10240 bytes. + .. versionchanged:: 3.2 Use ``'surrogateescape'`` as the default for the *errors* argument. @@ -434,6 +439,9 @@ be finalized; only the internally used file object will be closed. See the .. versionchanged:: 3.13 Add the *stream* parameter. + .. versionchanged:: 3.14 + Add the *blocking_factor* parameter. + .. classmethod:: TarFile.open(...) Alternative constructor. The :func:`tarfile.open` function is actually a diff --git a/Lib/tarfile.py b/Lib/tarfile.py index a0fab46b24e249..ada1ef65146433 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1691,10 +1691,13 @@ class TarFile(object): extraction_filter = None # The default filter for extraction. + record_size = RECORDSIZE # The default record size, matches tar -b20 + def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, errors="surrogateescape", pax_headers=None, debug=None, - errorlevel=None, copybufsize=None, stream=False): + errorlevel=None, copybufsize=None, stream=False, + blocking_factor=None): """Open an (uncompressed) tar archive 'name'. 'mode' is either 'r' to read from an existing archive, 'a' to append data to an existing file or 'w' to create a new file overwriting an existing one. 'mode' @@ -1750,6 +1753,8 @@ def __init__(self, name=None, mode="r", fileobj=None, format=None, self.debug = debug if errorlevel is not None: self.errorlevel = errorlevel + if blocking_factor is not None: + self.record_size = BLOCKSIZE * blocking_factor # Init datastructures. self.copybufsize = copybufsize @@ -2024,9 +2029,9 @@ def close(self): self.offset += (BLOCKSIZE * 2) # fill up the end with zero-blocks # (like option -b20 for tar does) - blocks, remainder = divmod(self.offset, RECORDSIZE) + blocks, remainder = divmod(self.offset, self.record_size) if remainder > 0: - self.fileobj.write(NUL * (RECORDSIZE - remainder)) + self.fileobj.write(NUL * (self.record_size - remainder)) finally: if not self._extfileobj: self.fileobj.close() @@ -2882,6 +2887,8 @@ def main(): parser.add_argument('--filter', metavar='', choices=_NAMED_FILTERS, help='Filter for extraction') + parser.add_argument('--blocking-factor', metavar='', type=int, + help='blocking_factor x 512 bytes per record, defaults to 20') group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-l', '--list', metavar='', @@ -2899,6 +2906,8 @@ def main(): if args.filter and args.extract is None: parser.exit(1, '--filter is only valid for extraction\n') + if args.blocking_factor and args.create is None: + parser.exit(1, '--blocking-factor is only valid for creation\n') if args.test is not None: src = args.test @@ -2960,7 +2969,8 @@ def main(): tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w' tar_files = args.create - with TarFile.open(tar_name, tar_mode) as tf: + with TarFile.open(tar_name, tar_mode, + blocking_factor=args.blocking_factor) as tf: for file_name in tar_files: tf.add(file_name) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2549b6b35adc29..2ac27cbbfb55ac 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1651,6 +1651,22 @@ def test_missing_fileobj(self): with self.assertRaises(ValueError): tar.addfile(tarinfo) + def test_archive_size(self): + # Make sure the archive size is a multiple of the configured + # record size + for blocking_factor, record_size, records in ( + (None, tarfile.RECORDSIZE, 1), (1, 512, 4), (2, 1024, 2), + (20, 10240, 1), (200, 102400, 1)): + tar = tarfile.open(tmpname, self.mode, blocking_factor=blocking_factor) + self.assertEqual(tar.record_size, record_size) + t = tarfile.TarInfo("foo") + t.size = tarfile.BLOCKSIZE + tar.addfile(t, io.BytesIO(b"a" * t.size)) + tar.close() + + with self.open(tmpname, "rb") as fobj: + self.assertEqual(len(fobj.read()), record_size * records) + class GzipWriteTest(GzipTest, WriteTest): pass @@ -2843,6 +2859,29 @@ def test_create_command_compressed(self): finally: os_helper.unlink(tar_name) + def test_create_command_blocking_factor(self): + files = [support.findfile('tokenize_tests.txt', + subdir='tokenizedata'), + support.findfile('tokenize_tests-no-coding-cookie-' + 'and-utf8-bom-sig-only.txt', + subdir='tokenizedata')] + for opt in '-c', '--create': + for blocking_factor, archive_size in ( + (1, tarfile.BLOCKSIZE*15), + (20, tarfile.BLOCKSIZE*20), + (100, tarfile.BLOCKSIZE*100)): + try: + out = self.tarfilecmd("--blocking-factor", str(blocking_factor), + opt, tmpname, *files) + self.assertEqual(out, b'') + self.assertEqual(out, b'') + with tarfile.open(tmpname) as tar: + tar.getmembers() + with io.FileIO(tmpname, "rb") as fobj: + self.assertEqual(len(fobj.read()), archive_size) + finally: + os_helper.unlink(tmpname) + def test_extract_command(self): self.make_simple_tarfile(tmpname) for opt in '-e', '--extract': diff --git a/Misc/ACKS b/Misc/ACKS index 7759bd0b95ed8b..ad9c892bbe8405 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1271,6 +1271,7 @@ Paul Monson The Dragon De Monsyne Bastien Montagne Skip Montanaro +Erik Montnémery Peter Moody HyunKyun Moon Alan D. Moore diff --git a/Misc/NEWS.d/next/Library/2025-01-24-14-36-35.gh-issue-75955.yG6for.rst b/Misc/NEWS.d/next/Library/2025-01-24-14-36-35.gh-issue-75955.yG6for.rst new file mode 100644 index 00000000000000..c22554e284fe4c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-24-14-36-35.gh-issue-75955.yG6for.rst @@ -0,0 +1 @@ +Add optional parameter ``blocking_factor`` to :func:`tarfile.TarFile`.