Skip to content

Commit 5fac207

Browse files
dalcinlmayeut
andauthored
Walk directories in sorted order for reproducibility (#517)
* fix: Walk directories in sorted order for reproducibility * recurse directories in sorted order * recurse top-level *.dist-info/ directories last * list filenames in sorted order * list top-level *.dist-info/RECORD files last * do not add zip file entries for non-empty directories Co-authored-by: Lisandro Dalcin <[email protected]> * Add test for empty folder * Revert "Add test for empty folder" This reverts commit a8134df. * Revert "do not add zip file entries for non-empty directories" This reverts commit fbdd62e. * Reapply "Add test for empty folder" This reverts commit 25b3dd1. * fix test --------- Co-authored-by: mayeut <[email protected]>
1 parent 2e00860 commit 5fac207

File tree

3 files changed

+78
-10
lines changed

3 files changed

+78
-10
lines changed

src/auditwheel/tools.py

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os
55
import subprocess
66
import zipfile
7-
from collections.abc import Iterable
7+
from collections.abc import Generator, Iterable
88
from datetime import datetime, timezone
99
from typing import Any
1010

@@ -29,6 +29,50 @@ def unique_by_index(sequence: Iterable[Any]) -> list[Any]:
2929
return uniques
3030

3131

32+
def walk(topdir: str) -> Generator[tuple[str, list[str], list[str]]]:
33+
"""Wrapper for `os.walk` with outputs in reproducible order
34+
35+
Parameters
36+
----------
37+
topdir : str
38+
Root of the directory tree
39+
40+
Yields
41+
------
42+
dirpath : str
43+
Path to a directory
44+
dirnames : list[str]
45+
List of subdirectory names in `dirpath`
46+
filenames : list[str]
47+
List of non-directory file names in `dirpath`
48+
"""
49+
topdir = os.path.normpath(topdir)
50+
for dirpath, dirnames, filenames in os.walk(topdir):
51+
# sort list of dirnames in-place such that `os.walk`
52+
# will recurse into subdirectories in reproducible order
53+
dirnames.sort()
54+
# recurse into any top-level .dist-info subdirectory last
55+
if dirpath == topdir:
56+
subdirs = []
57+
dist_info = []
58+
for dir in dirnames:
59+
if dir.endswith(".dist-info"):
60+
dist_info.append(dir)
61+
else:
62+
subdirs.append(dir)
63+
dirnames[:] = subdirs
64+
dirnames.extend(dist_info)
65+
del dist_info
66+
# sort list of filenames for iteration in reproducible order
67+
filenames.sort()
68+
# list any dist-info/RECORD file last
69+
if dirpath.endswith(".dist-info") and os.path.dirname(dirpath) == topdir:
70+
if "RECORD" in filenames:
71+
filenames.remove("RECORD")
72+
filenames.append("RECORD")
73+
yield dirpath, dirnames, filenames
74+
75+
3276
def zip2dir(zip_fname: str, out_dir: str) -> None:
3377
"""Extract `zip_fname` into output directory `out_dir`
3478
@@ -69,15 +113,16 @@ def dir2zip(in_dir: str, zip_fname: str, date_time: datetime | None = None) -> N
69113
date_time : Optional[datetime]
70114
Time stamp to set on each file in the archive
71115
"""
116+
in_dir = os.path.normpath(in_dir)
72117
if date_time is None:
73118
st = os.stat(in_dir)
74119
date_time = datetime.fromtimestamp(st.st_mtime, tz=timezone.utc)
75120
date_time_args = date_time.timetuple()[:6]
76121
compression = zipfile.ZIP_DEFLATED
77122
with zipfile.ZipFile(zip_fname, "w", compression=compression) as z:
78-
for root, dirs, files in os.walk(in_dir):
79-
for dir in dirs:
80-
dname = os.path.join(root, dir)
123+
for root, dirs, files in walk(in_dir):
124+
if root != in_dir:
125+
dname = root
81126
out_dname = os.path.relpath(dname, in_dir) + "/"
82127
zinfo = zipfile.ZipInfo.from_file(dname, out_dname)
83128
zinfo.date_time = date_time_args

src/auditwheel/wheeltools.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
from ._vendor.wheel.pkginfo import read_pkg_info, write_pkg_info
2727
from .tmpdirs import InTemporaryDirectory
28-
from .tools import dir2zip, unique_by_index, zip2dir
28+
from .tools import dir2zip, unique_by_index, walk, zip2dir
2929

3030
logger = logging.getLogger(__name__)
3131

@@ -69,18 +69,18 @@ def rewrite_record(bdist_dir: str) -> None:
6969
if exists(sig_path):
7070
os.unlink(sig_path)
7171

72-
def walk() -> Generator[str]:
73-
for dir, dirs, files in os.walk(bdist_dir):
74-
for f in files:
75-
yield pjoin(dir, f)
72+
def files() -> Generator[str]:
73+
for dir, _, files in walk(bdist_dir):
74+
for file in files:
75+
yield pjoin(dir, file)
7676

7777
def skip(path: str) -> bool:
7878
"""Wheel hashes every possible file."""
7979
return path == record_relpath
8080

8181
with open(record_path, "w+", newline="", encoding="utf-8") as record_file:
8282
writer = csv.writer(record_file)
83-
for path in walk():
83+
for path in files():
8484
relative_path = relpath(path, bdist_dir)
8585
if skip(relative_path):
8686
hash_ = ""

tests/unit/test_tools.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import argparse
44
import lzma
5+
import zipfile
56
from pathlib import Path
67

78
import pytest
@@ -100,3 +101,25 @@ def test_dir2zip_deflate(tmp_path):
100101
output_file = tmp_path / "ouput.zip"
101102
dir2zip(str(input_dir), str(output_file))
102103
assert output_file.stat().st_size < len(buffer) / 4
104+
105+
106+
def test_dir2zip_folders(tmp_path):
107+
input_dir = tmp_path / "input_dir"
108+
input_dir.mkdir()
109+
dist_info_folder = input_dir / "dummy-1.0.dist-info"
110+
dist_info_folder.mkdir()
111+
dist_info_folder.joinpath("METADATA").write_text("")
112+
empty_folder = input_dir / "dummy" / "empty"
113+
empty_folder.mkdir(parents=True)
114+
output_file = tmp_path / "output.zip"
115+
dir2zip(str(input_dir), str(output_file))
116+
expected_dirs = {"dummy/", "dummy/empty/", "dummy-1.0.dist-info/"}
117+
with zipfile.ZipFile(output_file, "r") as z:
118+
assert len(z.filelist) == 4
119+
for info in z.filelist:
120+
if info.is_dir():
121+
assert info.filename in expected_dirs
122+
expected_dirs.remove(info.filename)
123+
else:
124+
assert info.filename == "dummy-1.0.dist-info/METADATA"
125+
assert len(expected_dirs) == 0

0 commit comments

Comments
 (0)