Skip to content

Commit a734511

Browse files
fix: pick up upstream enhancements for tarfile reproducability (fixes #421) (#423)
* Import mozpack archive tests * Import upstream changes to make tarfiles more reproducible (fixes #421) This is mostly copy/pasting from https://searchfox.org/mozilla-central/source/python/mozbuild/mozpack/archive.py, with a couple of tweaks because we don't have the mozpack `file` classes here. The enchancements come from https://bugzilla.mozilla.org/show_bug.cgi?id=1807872 and https://bugzilla.mozilla.org/show_bug.cgi?id=1347582. As it turns out, these fix some tests currently marked as expected fails. * style: pre-commit.ci auto fixes [...] --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 3410301 commit a734511

File tree

3 files changed

+220
-13
lines changed

3 files changed

+220
-13
lines changed

src/taskgraph/util/archive.py

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,40 @@
1212
DEFAULT_MTIME = 1451606400
1313

1414

15+
# Python 3.9 contains this change:
16+
# https://github.com/python/cpython/commit/674935b8caf33e47c78f1b8e197b1b77a04992d2
17+
# which changes the output of tar creation compared to earlier versions.
18+
# As this code is used to generate tar files that are meant to be deterministic
19+
# across versions of python (specifically, it's used as part of computing the hash
20+
# of docker images, which needs to be identical between CI (which uses python 3.8),
21+
# and developer environments (using arbitrary versions of python, at this point,
22+
# most probably more recent than 3.9)).
23+
# What we do is subblass TarInfo so that if used on python >= 3.9, it reproduces the
24+
# behavior from python < 3.9.
25+
# Here's how it goes:
26+
# - the behavior in python >= 3.9 is the same as python < 3.9 when the type encoded
27+
# in the tarinfo is CHRTYPE or BLKTYPE.
28+
# - the value of the type is only compared in the context of choosing which behavior
29+
# to take
30+
# - we replace the type with the same value (so that using the value has no changes)
31+
# but that pretends to be the same as CHRTYPE so that the condition that enables the
32+
# old behavior is taken.
33+
class HackedType(bytes):
34+
def __eq__(self, other):
35+
if other == tarfile.CHRTYPE:
36+
return True
37+
return self == other
38+
39+
40+
class TarInfo(tarfile.TarInfo):
41+
@staticmethod
42+
def _create_header(info, format, encoding, errors):
43+
info["type"] = HackedType(info["type"])
44+
# ignore type checking because it looks like pyright complains because we're calling a
45+
# non-public method
46+
return tarfile.TarInfo._create_header(info, format, encoding, errors) # type: ignore
47+
48+
1549
def create_tar_from_files(fp, files):
1650
"""Create a tar file deterministically.
1751
@@ -25,15 +59,23 @@ def create_tar_from_files(fp, files):
2559
2660
FUTURE accept a filename argument (or create APIs to write files)
2761
"""
28-
with tarfile.open(name="", mode="w", fileobj=fp, dereference=True) as tf:
62+
# The format is explicitly set to tarfile.GNU_FORMAT, because this default format
63+
# has been changed in Python 3.8.
64+
with tarfile.open(
65+
name="", mode="w", fileobj=fp, dereference=True, format=tarfile.GNU_FORMAT
66+
) as tf:
2967
for archive_path, f in sorted(files.items()):
3068
if isinstance(f, str):
31-
mode = os.stat(f).st_mode
69+
s = os.stat(f)
70+
mode = s.st_mode
71+
size = s.st_size
3272
f = open(f, "rb")
3373
else:
3474
mode = 0o0644
75+
size = len(f.read())
76+
f.seek(0)
3577

36-
ti = tarfile.TarInfo(archive_path)
78+
ti = TarInfo(archive_path)
3779
ti.mode = mode
3880
ti.type = tarfile.REGTYPE
3981

@@ -56,9 +98,7 @@ def create_tar_from_files(fp, files):
5698
# Set mtime to a constant value.
5799
ti.mtime = DEFAULT_MTIME
58100

59-
f.seek(0, 2)
60-
ti.size = f.tell()
61-
f.seek(0, 0)
101+
ti.size = size
62102
# tarfile wants to pass a size argument to read(). So just
63103
# wrap/buffer in a proper file object interface.
64104
tf.addfile(ti, f)

test/test_util_archive.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
# This Source Code Form is subject to the terms of the Mozilla Public
2+
# License, v. 2.0. If a copy of the MPL was not distributed with this
3+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
import hashlib
6+
import io
7+
import os
8+
import shutil
9+
import stat
10+
import tarfile
11+
import tempfile
12+
import unittest
13+
14+
import pytest
15+
16+
from taskgraph.util.archive import (
17+
DEFAULT_MTIME,
18+
create_tar_from_files,
19+
create_tar_gz_from_files,
20+
)
21+
22+
MODE_STANDARD = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
23+
24+
25+
def file_hash(path):
26+
h = hashlib.sha1()
27+
with open(path, "rb") as fh:
28+
while True:
29+
data = fh.read(8192)
30+
if not data:
31+
break
32+
h.update(data)
33+
34+
return h.hexdigest()
35+
36+
37+
class TestArchive(unittest.TestCase):
38+
def _create_files(self, root):
39+
files = {}
40+
for i in range(10):
41+
p = os.path.join(root, "file%02d" % i)
42+
with open(p, "wb") as fh:
43+
fh.write(b"file%02d" % i)
44+
# Need to set permissions or umask may influence testing.
45+
os.chmod(p, MODE_STANDARD)
46+
files["file%02d" % i] = p
47+
48+
for i in range(10):
49+
files["file%02d" % (i + 10)] = io.BytesIO(b"file%02d" % (i + 10))
50+
51+
return files
52+
53+
def _verify_basic_tarfile(self, tf):
54+
self.assertEqual(len(tf.getmembers()), 20)
55+
56+
names = ["file%02d" % i for i in range(20)]
57+
self.assertEqual(tf.getnames(), names)
58+
59+
for ti in tf.getmembers():
60+
self.assertEqual(ti.uid, 0)
61+
self.assertEqual(ti.gid, 0)
62+
self.assertEqual(ti.uname, "")
63+
self.assertEqual(ti.gname, "")
64+
self.assertEqual(ti.mode, MODE_STANDARD)
65+
self.assertEqual(ti.mtime, DEFAULT_MTIME)
66+
67+
@pytest.mark.xfail(
68+
reason="ValueError is not thrown despite being provided directory."
69+
)
70+
def test_dirs_refused(self):
71+
d = tempfile.mkdtemp()
72+
try:
73+
tp = os.path.join(d, "test.tar")
74+
with open(tp, "wb") as fh:
75+
with self.assertRaisesRegex(ValueError, "not a regular"):
76+
create_tar_from_files(fh, {"test": d})
77+
finally:
78+
shutil.rmtree(d)
79+
80+
def test_setuid_setgid_refused(self):
81+
d = tempfile.mkdtemp()
82+
try:
83+
uid = os.path.join(d, "setuid")
84+
gid = os.path.join(d, "setgid")
85+
with open(uid, "a"):
86+
pass
87+
with open(gid, "a"):
88+
pass
89+
90+
os.chmod(uid, MODE_STANDARD | stat.S_ISUID)
91+
os.chmod(gid, MODE_STANDARD | stat.S_ISGID)
92+
93+
tp = os.path.join(d, "test.tar")
94+
with open(tp, "wb") as fh:
95+
with self.assertRaisesRegex(ValueError, "cannot add file with setuid"):
96+
create_tar_from_files(fh, {"test": uid})
97+
with self.assertRaisesRegex(ValueError, "cannot add file with setuid"):
98+
create_tar_from_files(fh, {"test": gid})
99+
finally:
100+
shutil.rmtree(d)
101+
102+
def test_create_tar_basic(self):
103+
d = tempfile.mkdtemp()
104+
try:
105+
files = self._create_files(d)
106+
107+
tp = os.path.join(d, "test.tar")
108+
with open(tp, "wb") as fh:
109+
create_tar_from_files(fh, files)
110+
111+
# Output should be deterministic.
112+
self.assertEqual(file_hash(tp), "01cd314e277f060e98c7de6c8ea57f96b3a2065c")
113+
114+
with tarfile.open(tp, "r") as tf:
115+
self._verify_basic_tarfile(tf)
116+
117+
finally:
118+
shutil.rmtree(d)
119+
120+
@pytest.mark.xfail(reason="hash mismatch")
121+
def test_executable_preserved(self):
122+
d = tempfile.mkdtemp()
123+
try:
124+
p = os.path.join(d, "exec")
125+
with open(p, "wb") as fh:
126+
fh.write("#!/bin/bash\n")
127+
os.chmod(p, MODE_STANDARD | stat.S_IXUSR)
128+
129+
tp = os.path.join(d, "test.tar")
130+
with open(tp, "wb") as fh:
131+
create_tar_from_files(fh, {"exec": p})
132+
133+
self.assertEqual(file_hash(tp), "357e1b81c0b6cfdfa5d2d118d420025c3c76ee93")
134+
135+
with tarfile.open(tp, "r") as tf:
136+
m = tf.getmember("exec")
137+
self.assertEqual(m.mode, MODE_STANDARD | stat.S_IXUSR)
138+
139+
finally:
140+
shutil.rmtree(d)
141+
142+
def test_create_tar_gz_basic(self):
143+
d = tempfile.mkdtemp()
144+
try:
145+
files = self._create_files(d)
146+
147+
gp = os.path.join(d, "test.tar.gz")
148+
with open(gp, "wb") as fh:
149+
create_tar_gz_from_files(fh, files)
150+
151+
self.assertEqual(file_hash(gp), "7c4da5adc5088cdf00911d5daf9a67b15de714b7")
152+
153+
with tarfile.open(gp, "r:gz") as tf:
154+
self._verify_basic_tarfile(tf)
155+
156+
finally:
157+
shutil.rmtree(d)
158+
159+
def test_tar_gz_name(self):
160+
d = tempfile.mkdtemp()
161+
try:
162+
files = self._create_files(d)
163+
164+
gp = os.path.join(d, "test.tar.gz")
165+
with open(gp, "wb") as fh:
166+
create_tar_gz_from_files(fh, files, filename="foobar")
167+
168+
self.assertEqual(file_hash(gp), "721e00083c17d16df2edbddf40136298c06d0c49")
169+
170+
with tarfile.open(gp, "r:gz") as tf:
171+
self._verify_basic_tarfile(tf)
172+
173+
finally:
174+
shutil.rmtree(d)

test/test_util_docker.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,12 @@
66
import os
77
import shutil
88
import stat
9-
import sys
109
import tarfile
1110
import tempfile
1211
import unittest
1312
from io import BufferedRandom, BytesIO
1413
from unittest import mock
1514

16-
import pytest
1715
import taskcluster_urls as liburls
1816

1917
from taskgraph.util import docker
@@ -25,7 +23,6 @@
2523

2624
@mock.patch.dict("os.environ", {"TASKCLUSTER_ROOT_URL": liburls.test_root_url()})
2725
class TestDocker(unittest.TestCase):
28-
@pytest.mark.xfail(sys.version_info >= (3, 8), reason="Hash is different")
2926
def test_generate_context_hash(self):
3027
tmpdir = tempfile.mkdtemp()
3128
try:
@@ -87,7 +84,6 @@ def test_docker_image_default_registry_by_tag(self):
8784
docker.docker_image("myimage", by_tag=True), "mozilla/myimage:1.2.3"
8885
)
8986

90-
@pytest.mark.xfail(sys.version_info >= (3, 8), reason="Hash is different")
9187
def test_create_context_tar_basic(self):
9288
tmp = tempfile.mkdtemp()
9389
try:
@@ -119,7 +115,6 @@ def test_create_context_tar_basic(self):
119115
finally:
120116
shutil.rmtree(tmp)
121117

122-
@pytest.mark.xfail(sys.version_info >= (3, 8), reason="Hash is different")
123118
def test_create_context_topsrcdir_files(self):
124119
tmp = tempfile.mkdtemp()
125120
try:
@@ -195,7 +190,6 @@ def test_create_context_missing_extra(self):
195190
finally:
196191
shutil.rmtree(tmp)
197192

198-
@pytest.mark.xfail(sys.version_info >= (3, 8), reason="Hash is different")
199193
def test_create_context_extra_directory(self):
200194
tmp = tempfile.mkdtemp()
201195
try:
@@ -240,7 +234,6 @@ def test_create_context_extra_directory(self):
240234
finally:
241235
shutil.rmtree(tmp)
242236

243-
@pytest.mark.xfail(sys.version_info >= (3, 8), reason="Hash is different")
244237
def test_stream_context_tar(self):
245238
tmp = tempfile.mkdtemp()
246239
try:

0 commit comments

Comments
 (0)