Skip to content

Commit d93cf66

Browse files
committed
gh-75707: tarfile: Add optional open() argument "mtime"
This makes it possible to set the gzip header mtime field without overriding time.time(), making it useful when creating reproducible archives.
1 parent 96b7a2e commit d93cf66

File tree

3 files changed

+25
-7
lines changed

3 files changed

+25
-7
lines changed

Doc/library/tarfile.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,10 @@ Some facts and figures:
137137
a Zstandard dictionary used to improve compression of smaller amounts of
138138
data.
139139

140+
For modes ``'w:gz'`` and ``'w|gz'``, :func:`tarfile.open` accepts the
141+
keyword argument *mtime* to create a gzip archive header with that mtime. By
142+
default, the mtime is set to the time of creation of the archive.
143+
140144
For special purposes, there is a second format for *mode*:
141145
``'filemode|[compression]'``. :func:`tarfile.open` will return a :class:`TarFile`
142146
object that processes its data as a stream of blocks. No random seeking will

Lib/tarfile.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ class _Stream:
339339
"""
340340

341341
def __init__(self, name, mode, comptype, fileobj, bufsize,
342-
compresslevel, preset):
342+
compresslevel, preset, mtime):
343343
"""Construct a _Stream object.
344344
"""
345345
self._extfileobj = True
@@ -374,7 +374,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize,
374374
self.exception = zlib.error
375375
self._init_read_gz()
376376
else:
377-
self._init_write_gz(compresslevel)
377+
self._init_write_gz(compresslevel, mtime)
378378

379379
elif comptype == "bz2":
380380
try:
@@ -423,15 +423,17 @@ def __del__(self):
423423
if hasattr(self, "closed") and not self.closed:
424424
self.close()
425425

426-
def _init_write_gz(self, compresslevel):
426+
def _init_write_gz(self, compresslevel, mtime):
427427
"""Initialize for writing with gzip compression.
428428
"""
429429
self.cmp = self.zlib.compressobj(compresslevel,
430430
self.zlib.DEFLATED,
431431
-self.zlib.MAX_WBITS,
432432
self.zlib.DEF_MEM_LEVEL,
433433
0)
434-
timestamp = struct.pack("<L", int(time.time()))
434+
if mtime is None:
435+
mtime = int(time.time())
436+
timestamp = struct.pack("<L", mtime)
435437
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
436438
if self.name.endswith(".gz"):
437439
self.name = self.name[:-3]
@@ -1726,7 +1728,7 @@ class TarFile(object):
17261728
def __init__(self, name=None, mode="r", fileobj=None, format=None,
17271729
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
17281730
errors="surrogateescape", pax_headers=None, debug=None,
1729-
errorlevel=None, copybufsize=None, stream=False):
1731+
errorlevel=None, copybufsize=None, stream=False, mtime=None):
17301732
"""Open an (uncompressed) tar archive 'name'. 'mode' is either 'r' to
17311733
read from an existing archive, 'a' to append data to an existing
17321734
file or 'w' to create a new file overwriting an existing one. 'mode'
@@ -1932,8 +1934,9 @@ def not_compressed(comptype):
19321934

19331935
compresslevel = kwargs.pop("compresslevel", 6)
19341936
preset = kwargs.pop("preset", None)
1937+
mtime = kwargs.pop("mtime", None)
19351938
stream = _Stream(name, filemode, comptype, fileobj, bufsize,
1936-
compresslevel, preset)
1939+
compresslevel, preset, mtime)
19371940
try:
19381941
t = cls(name, filemode, stream, **kwargs)
19391942
except:
@@ -1969,7 +1972,8 @@ def gzopen(cls, name, mode="r", fileobj=None, compresslevel=6, **kwargs):
19691972
raise CompressionError("gzip module is not available") from None
19701973

19711974
try:
1972-
fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1975+
mtime = kwargs.pop("mtime", None)
1976+
fileobj = GzipFile(name, mode + "b", compresslevel, fileobj, mtime=mtime)
19731977
except OSError as e:
19741978
if fileobj is not None and mode == 'r':
19751979
raise ReadError("not a gzip file") from e

Lib/test/test_tarfile.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,6 +1809,11 @@ def test_source_directory_not_leaked(self):
18091809
payload = pathlib.Path(tmpname).read_text(encoding='latin-1')
18101810
assert os.path.dirname(tmpname) not in payload
18111811

1812+
def test_create_with_mtime(self):
1813+
tarfile.open(tmpname, self.mode, mtime=0).close()
1814+
with self.open(tmpname, 'r') as fobj:
1815+
fobj.read()
1816+
self.assertEqual(fobj.mtime, 0)
18121817

18131818
class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
18141819
decompressor = bz2.BZ2Decompressor if bz2 else None
@@ -2115,6 +2120,11 @@ def test_create_with_compresslevel(self):
21152120
with tarfile.open(tmpname, 'r:gz', compresslevel=1) as tobj:
21162121
pass
21172122

2123+
def test_create_with_mtime(self):
2124+
tarfile.open(tmpname, self.mode, mtime=0).close()
2125+
with self.open(tmpname, 'rb') as fobj:
2126+
fobj.read()
2127+
self.assertEqual(fobj.mtime, 0)
21182128

21192129
class Bz2CreateTest(Bz2Test, CreateTest):
21202130

0 commit comments

Comments
 (0)