Skip to content

Commit e013c5a

Browse files
committed
gh-75707: tarfile: Add optional open() argument "mtime"
This makes it possible to set the gzip header mtime field without overriding time.time(), making it useful when creating reproducible archives.
1 parent 96b7a2e commit e013c5a

File tree

3 files changed

+42
-7
lines changed

3 files changed

+42
-7
lines changed

Doc/library/tarfile.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,10 @@ Some facts and figures:
137137
a Zstandard dictionary used to improve compression of smaller amounts of
138138
data.
139139

140+
For modes ``'w:gz'`` and ``'w|gz'``, :func:`tarfile.open` accepts the
141+
keyword argument *mtime* to create a gzip archive header with that mtime. By
142+
default, the mtime is set to the time of creation of the archive.
143+
140144
For special purposes, there is a second format for *mode*:
141145
``'filemode|[compression]'``. :func:`tarfile.open` will return a :class:`TarFile`
142146
object that processes its data as a stream of blocks. No random seeking will

Lib/tarfile.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ class _Stream:
339339
"""
340340

341341
def __init__(self, name, mode, comptype, fileobj, bufsize,
342-
compresslevel, preset):
342+
compresslevel, preset, mtime):
343343
"""Construct a _Stream object.
344344
"""
345345
self._extfileobj = True
@@ -374,7 +374,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize,
374374
self.exception = zlib.error
375375
self._init_read_gz()
376376
else:
377-
self._init_write_gz(compresslevel)
377+
self._init_write_gz(compresslevel, mtime)
378378

379379
elif comptype == "bz2":
380380
try:
@@ -423,15 +423,17 @@ def __del__(self):
423423
if hasattr(self, "closed") and not self.closed:
424424
self.close()
425425

426-
def _init_write_gz(self, compresslevel):
426+
def _init_write_gz(self, compresslevel, mtime):
427427
"""Initialize for writing with gzip compression.
428428
"""
429429
self.cmp = self.zlib.compressobj(compresslevel,
430430
self.zlib.DEFLATED,
431431
-self.zlib.MAX_WBITS,
432432
self.zlib.DEF_MEM_LEVEL,
433433
0)
434-
timestamp = struct.pack("<L", int(time.time()))
434+
if mtime is None:
435+
mtime = int(time.time())
436+
timestamp = struct.pack("<L", mtime)
435437
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
436438
if self.name.endswith(".gz"):
437439
self.name = self.name[:-3]
@@ -1726,7 +1728,7 @@ class TarFile(object):
17261728
def __init__(self, name=None, mode="r", fileobj=None, format=None,
17271729
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
17281730
errors="surrogateescape", pax_headers=None, debug=None,
1729-
errorlevel=None, copybufsize=None, stream=False):
1731+
errorlevel=None, copybufsize=None, stream=False, mtime=None):
17301732
"""Open an (uncompressed) tar archive 'name'. 'mode' is either 'r' to
17311733
read from an existing archive, 'a' to append data to an existing
17321734
file or 'w' to create a new file overwriting an existing one. 'mode'
@@ -1932,8 +1934,9 @@ def not_compressed(comptype):
19321934

19331935
compresslevel = kwargs.pop("compresslevel", 6)
19341936
preset = kwargs.pop("preset", None)
1937+
mtime = kwargs.pop("mtime", None)
19351938
stream = _Stream(name, filemode, comptype, fileobj, bufsize,
1936-
compresslevel, preset)
1939+
compresslevel, preset, mtime)
19371940
try:
19381941
t = cls(name, filemode, stream, **kwargs)
19391942
except:
@@ -1969,7 +1972,8 @@ def gzopen(cls, name, mode="r", fileobj=None, compresslevel=6, **kwargs):
19691972
raise CompressionError("gzip module is not available") from None
19701973

19711974
try:
1972-
fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1975+
mtime = kwargs.pop("mtime", None)
1976+
fileobj = GzipFile(name, mode + "b", compresslevel, fileobj, mtime=mtime)
19731977
except OSError as e:
19741978
if fileobj is not None and mode == 'r':
19751979
raise ReadError("not a gzip file") from e

Lib/test/test_tarfile.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import re
1111
import warnings
1212
import stat
13+
import time
1314

1415
import unittest
1516
import unittest.mock
@@ -1809,6 +1810,19 @@ def test_source_directory_not_leaked(self):
18091810
payload = pathlib.Path(tmpname).read_text(encoding='latin-1')
18101811
assert os.path.dirname(tmpname) not in payload
18111812

1813+
def test_create_with_mtime(self):
1814+
tarfile.open(tmpname, self.mode, mtime=0).close()
1815+
with self.open(tmpname, 'r') as fobj:
1816+
fobj.read()
1817+
self.assertEqual(fobj.mtime, 0)
1818+
1819+
def test_create_without_mtime(self):
1820+
before = int(time.time())
1821+
tarfile.open(tmpname, self.mode).close()
1822+
after = int(time.time())
1823+
with self.open(tmpname, 'r') as fobj:
1824+
fobj.read()
1825+
self.assertTrue(before <= fobj.mtime <= after)
18121826

18131827
class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
18141828
decompressor = bz2.BZ2Decompressor if bz2 else None
@@ -2115,6 +2129,19 @@ def test_create_with_compresslevel(self):
21152129
with tarfile.open(tmpname, 'r:gz', compresslevel=1) as tobj:
21162130
pass
21172131

2132+
def test_create_with_mtime(self):
2133+
tarfile.open(tmpname, self.mode, mtime=0).close()
2134+
with self.open(tmpname, 'rb') as fobj:
2135+
fobj.read()
2136+
self.assertEqual(fobj.mtime, 0)
2137+
2138+
def test_create_without_mtime(self):
2139+
before = int(time.time())
2140+
tarfile.open(tmpname, self.mode).close()
2141+
after = int(time.time())
2142+
with self.open(tmpname, 'r') as fobj:
2143+
fobj.read()
2144+
self.assertTrue(before <= fobj.mtime <= after)
21182145

21192146
class Bz2CreateTest(Bz2Test, CreateTest):
21202147

0 commit comments

Comments
 (0)