Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Doc/library/tarfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ be finalized; only the internally used file object will be closed. See the
.. versionadded:: 3.2
Added support for the context management protocol.

.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1, stream=False)
.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1, stream=False, blocking_factor=None)

All following arguments are optional and can be accessed as instance attributes
as well.
Expand Down Expand Up @@ -422,6 +422,11 @@ be finalized; only the internally used file object will be closed. See the
If *stream* is set to :const:`True` then while reading the archive info about files
in the archive are not cached, saving memory.

If *blocking_factor* is given, the record size will be blocking_factor * 512 bytes.
After writing to an archive, the size will be padded with zeros to an integer number
of records. The default blocking factor is 20, corresponding to a record size of
10240 bytes.

.. versionchanged:: 3.2
Use ``'surrogateescape'`` as the default for the *errors* argument.

Expand All @@ -434,6 +439,9 @@ be finalized; only the internally used file object will be closed. See the
.. versionchanged:: 3.13
Add the *stream* parameter.

.. versionchanged:: 3.14
Add the *blocking_factor* parameter.

.. classmethod:: TarFile.open(...)

Alternative constructor. The :func:`tarfile.open` function is actually a
Expand Down
18 changes: 14 additions & 4 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1691,10 +1691,13 @@ class TarFile(object):

extraction_filter = None # The default filter for extraction.

record_size = RECORDSIZE # The default record size, matches tar -b20

def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
errors="surrogateescape", pax_headers=None, debug=None,
errorlevel=None, copybufsize=None, stream=False):
errorlevel=None, copybufsize=None, stream=False,
blocking_factor=None):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of None, should the blocking_factor default to 20, or a constant BLOCKING_FACTOR?

"""Open an (uncompressed) tar archive 'name'. 'mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. 'mode'
Expand Down Expand Up @@ -1750,6 +1753,8 @@ def __init__(self, name=None, mode="r", fileobj=None, format=None,
self.debug = debug
if errorlevel is not None:
self.errorlevel = errorlevel
if blocking_factor is not None:
self.record_size = BLOCKSIZE * blocking_factor

# Init datastructures.
self.copybufsize = copybufsize
Expand Down Expand Up @@ -2024,9 +2029,9 @@ def close(self):
self.offset += (BLOCKSIZE * 2)
# fill up the end with zero-blocks
# (like option -b20 for tar does)
blocks, remainder = divmod(self.offset, RECORDSIZE)
blocks, remainder = divmod(self.offset, self.record_size)
if remainder > 0:
self.fileobj.write(NUL * (RECORDSIZE - remainder))
self.fileobj.write(NUL * (self.record_size - remainder))
finally:
if not self._extfileobj:
self.fileobj.close()
Expand Down Expand Up @@ -2882,6 +2887,8 @@ def main():
parser.add_argument('--filter', metavar='<filtername>',
choices=_NAMED_FILTERS,
help='Filter for extraction')
parser.add_argument('--blocking-factor', metavar='<blocking_factor>', type=int,
help='blocking_factor x 512 bytes per record, defaults to 20')

group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-l', '--list', metavar='<tarfile>',
Expand All @@ -2899,6 +2906,8 @@ def main():

if args.filter and args.extract is None:
parser.exit(1, '--filter is only valid for extraction\n')
if args.blocking_factor and args.create is None:
parser.exit(1, '--blocking-factor is only valid for creation\n')

if args.test is not None:
src = args.test
Expand Down Expand Up @@ -2960,7 +2969,8 @@ def main():
tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
tar_files = args.create

with TarFile.open(tar_name, tar_mode) as tf:
with TarFile.open(tar_name, tar_mode,
blocking_factor=args.blocking_factor) as tf:
for file_name in tar_files:
tf.add(file_name)

Expand Down
39 changes: 39 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1651,6 +1651,22 @@ def test_missing_fileobj(self):
with self.assertRaises(ValueError):
tar.addfile(tarinfo)

def test_archive_size(self):
# Make sure the archive size is a multiple of the configured
# record size
for blocking_factor, record_size, records in (
(None, tarfile.RECORDSIZE, 1), (1, 512, 4), (2, 1024, 2),
(20, 10240, 1), (200, 102400, 1)):
tar = tarfile.open(tmpname, self.mode, blocking_factor=blocking_factor)
self.assertEqual(tar.record_size, record_size)
t = tarfile.TarInfo("foo")
t.size = tarfile.BLOCKSIZE
tar.addfile(t, io.BytesIO(b"a" * t.size))
tar.close()

with self.open(tmpname, "rb") as fobj:
self.assertEqual(len(fobj.read()), record_size * records)


class GzipWriteTest(GzipTest, WriteTest):
pass
Expand Down Expand Up @@ -2843,6 +2859,29 @@ def test_create_command_compressed(self):
finally:
os_helper.unlink(tar_name)

def test_create_command_blocking_factor(self):
files = [support.findfile('tokenize_tests.txt',
subdir='tokenizedata'),
support.findfile('tokenize_tests-no-coding-cookie-'
'and-utf8-bom-sig-only.txt',
subdir='tokenizedata')]
for opt in '-c', '--create':
for blocking_factor, archive_size in (
(1, tarfile.BLOCKSIZE*15),
(20, tarfile.BLOCKSIZE*20),
(100, tarfile.BLOCKSIZE*100)):
try:
out = self.tarfilecmd("--blocking-factor", str(blocking_factor),
opt, tmpname, *files)
self.assertEqual(out, b'')
self.assertEqual(out, b'')
with tarfile.open(tmpname) as tar:
tar.getmembers()
with io.FileIO(tmpname, "rb") as fobj:
self.assertEqual(len(fobj.read()), archive_size)
finally:
os_helper.unlink(tmpname)

def test_extract_command(self):
self.make_simple_tarfile(tmpname)
for opt in '-e', '--extract':
Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,7 @@ Paul Monson
The Dragon De Monsyne
Bastien Montagne
Skip Montanaro
Erik Montnémery
Peter Moody
HyunKyun Moon
Alan D. Moore
Expand Down
Loading