Skip to content

Commit 615ceea

Browse files
committed
tarfile: Allow configuring the record size
1 parent 732670d commit 615ceea

File tree

4 files changed

+63
-5
lines changed

4 files changed

+63
-5
lines changed

Doc/library/tarfile.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ be finalized; only the internally used file object will be closed. See the
368368
.. versionadded:: 3.2
369369
Added support for the context management protocol.
370370

371-
.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1, stream=False)
371+
.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1, stream=False, blocking_factor=None)
372372

373373
All following arguments are optional and can be accessed as instance attributes
374374
as well.
@@ -422,6 +422,11 @@ be finalized; only the internally used file object will be closed. See the
422422
If *stream* is set to :const:`True` then while reading the archive info about files
423423
in the archive are not cached, saving memory.
424424

425+
If *blocking_factor* is given, the record size will be blocking_factor * 512 bytes.
426+
After writing to an archive, the size will be padded with zeros to an integer number
427+
of records. The default blocking factor is 20, corresponding to a record size of
428+
10240 bytes.
429+
425430
.. versionchanged:: 3.2
426431
Use ``'surrogateescape'`` as the default for the *errors* argument.
427432

@@ -434,6 +439,9 @@ be finalized; only the internally used file object will be closed. See the
434439
.. versionchanged:: 3.13
435440
Add the *stream* parameter.
436441

442+
.. versionchanged:: 3.14
443+
Add the *blocking_factor* parameter.
444+
437445
.. classmethod:: TarFile.open(...)
438446

439447
Alternative constructor. The :func:`tarfile.open` function is actually a

Lib/tarfile.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,10 +1691,13 @@ class TarFile(object):
16911691

16921692
extraction_filter = None # The default filter for extraction.
16931693

1694+
record_size = RECORDSIZE # The default record size, matches tar -b20
1695+
16941696
def __init__(self, name=None, mode="r", fileobj=None, format=None,
16951697
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
16961698
errors="surrogateescape", pax_headers=None, debug=None,
1697-
errorlevel=None, copybufsize=None, stream=False):
1699+
errorlevel=None, copybufsize=None, stream=False,
1700+
blocking_factor=None):
16981701
"""Open an (uncompressed) tar archive 'name'. 'mode' is either 'r' to
16991702
read from an existing archive, 'a' to append data to an existing
17001703
file or 'w' to create a new file overwriting an existing one. 'mode'
@@ -1750,6 +1753,8 @@ def __init__(self, name=None, mode="r", fileobj=None, format=None,
17501753
self.debug = debug
17511754
if errorlevel is not None:
17521755
self.errorlevel = errorlevel
1756+
if blocking_factor is not None:
1757+
self.record_size = BLOCKSIZE * blocking_factor
17531758

17541759
# Init datastructures.
17551760
self.copybufsize = copybufsize
@@ -2024,9 +2029,9 @@ def close(self):
20242029
self.offset += (BLOCKSIZE * 2)
20252030
# fill up the end with zero-blocks
20262031
# (like option -b20 for tar does)
2027-
blocks, remainder = divmod(self.offset, RECORDSIZE)
2032+
blocks, remainder = divmod(self.offset, self.record_size)
20282033
if remainder > 0:
2029-
self.fileobj.write(NUL * (RECORDSIZE - remainder))
2034+
self.fileobj.write(NUL * (self.record_size - remainder))
20302035
finally:
20312036
if not self._extfileobj:
20322037
self.fileobj.close()
@@ -2882,6 +2887,8 @@ def main():
28822887
parser.add_argument('--filter', metavar='<filtername>',
28832888
choices=_NAMED_FILTERS,
28842889
help='Filter for extraction')
2890+
parser.add_argument('--blocking-factor', metavar='<blocking_factor>', type=int,
2891+
help='blocking_factor x 512 bytes per record, defaults to 20')
28852892

28862893
group = parser.add_mutually_exclusive_group(required=True)
28872894
group.add_argument('-l', '--list', metavar='<tarfile>',
@@ -2899,6 +2906,8 @@ def main():
28992906

29002907
if args.filter and args.extract is None:
29012908
parser.exit(1, '--filter is only valid for extraction\n')
2909+
if args.blocking_factor and args.create is None:
2910+
parser.exit(1, '--blocking-factor is only valid for creation\n')
29022911

29032912
if args.test is not None:
29042913
src = args.test
@@ -2960,7 +2969,8 @@ def main():
29602969
tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
29612970
tar_files = args.create
29622971

2963-
with TarFile.open(tar_name, tar_mode) as tf:
2972+
with TarFile.open(tar_name, tar_mode,
2973+
blocking_factor=args.blocking_factor) as tf:
29642974
for file_name in tar_files:
29652975
tf.add(file_name)
29662976

Lib/test/test_tarfile.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,6 +1651,22 @@ def test_missing_fileobj(self):
16511651
with self.assertRaises(ValueError):
16521652
tar.addfile(tarinfo)
16531653

1654+
def test_archive_size(self):
1655+
# Make sure the archive size is a multiple of the configured
1656+
# record size
1657+
for blocking_factor, record_size, records in (
1658+
(None, tarfile.RECORDSIZE, 1), (1, 512, 4), (2, 1024, 2),
1659+
(20, 10240, 1), (200, 102400, 1)):
1660+
tar = tarfile.open(tmpname, self.mode, blocking_factor=blocking_factor)
1661+
self.assertEqual(tar.record_size, record_size)
1662+
t = tarfile.TarInfo("foo")
1663+
t.size = tarfile.BLOCKSIZE
1664+
tar.addfile(t, io.BytesIO(b"a" * t.size))
1665+
tar.close()
1666+
1667+
with self.open(tmpname, "rb") as fobj:
1668+
self.assertEqual(len(fobj.read()), record_size * records)
1669+
16541670

16551671
class GzipWriteTest(GzipTest, WriteTest):
16561672
pass
@@ -2843,6 +2859,29 @@ def test_create_command_compressed(self):
28432859
finally:
28442860
os_helper.unlink(tar_name)
28452861

2862+
def test_create_command_blocking_factor(self):
2863+
files = [support.findfile('tokenize_tests.txt',
2864+
subdir='tokenizedata'),
2865+
support.findfile('tokenize_tests-no-coding-cookie-'
2866+
'and-utf8-bom-sig-only.txt',
2867+
subdir='tokenizedata')]
2868+
for opt in '-c', '--create':
2869+
for blocking_factor, archive_size in (
2870+
(1, tarfile.BLOCKSIZE*15),
2871+
(20, tarfile.BLOCKSIZE*20),
2872+
(100, tarfile.BLOCKSIZE*100)):
2873+
try:
2874+
out = self.tarfilecmd("--blocking-factor", str(blocking_factor),
2875+
opt, tmpname, *files)
2876+
self.assertEqual(out, b'')
2877+
self.assertEqual(out, b'')
2878+
with tarfile.open(tmpname) as tar:
2879+
tar.getmembers()
2880+
with io.FileIO(tmpname, "rb") as fobj:
2881+
self.assertEqual(len(fobj.read()), archive_size)
2882+
finally:
2883+
os_helper.unlink(tmpname)
2884+
28462885
def test_extract_command(self):
28472886
self.make_simple_tarfile(tmpname)
28482887
for opt in '-e', '--extract':

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,7 @@ Paul Monson
12711271
The Dragon De Monsyne
12721272
Bastien Montagne
12731273
Skip Montanaro
1274+
Erik Montnémery
12741275
Peter Moody
12751276
HyunKyun Moon
12761277
Alan D. Moore

0 commit comments

Comments
 (0)