Skip to content

Commit 11ee138

Browse files
authored
feat: core metadata support and backfill
2 parents a3a336c + 447a2fd commit 11ee138

File tree

9 files changed

+171
-11
lines changed

9 files changed

+171
-11
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: backfill metadata
2+
3+
on:
4+
workflow_dispatch:
5+
6+
jobs:
7+
backfill-metadata:
8+
runs-on: ubuntu-latest
9+
permissions:
10+
id-token: write
11+
contents: read
12+
steps:
13+
- uses: actions/checkout@v3
14+
- uses: actions/setup-python@v4
15+
with:
16+
python-version: '3.11'
17+
- uses: google-github-actions/auth@ba79af03959ebeac9769e648f473a284504d9193 # v2.1.10
18+
with:
19+
workload_identity_provider: projects/868781662168/locations/global/workloadIdentityPools/prod-github/providers/github-oidc-pool
20+
service_account: [email protected]
21+
- run: python3 -uS bin/backfill-core-metadata --pypi-url https://pypi.devinfra.sentry.io

bin/backfill-core-metadata

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/usr/bin/env python3
2+
from __future__ import annotations
3+
4+
import argparse
5+
import hashlib
6+
import json
7+
import os.path
8+
import shutil
9+
import subprocess
10+
import tempfile
11+
import urllib.parse
12+
import urllib.request
13+
import zipfile
14+
from collections.abc import Sequence
15+
16+
17+
def _get_metadata_bytes(filename: str) -> bytes:
18+
with zipfile.ZipFile(filename) as zipf:
19+
(metadata,) = (
20+
name
21+
for name in zipf.namelist()
22+
if name.endswith(".dist-info/METADATA") and name.count("/") == 1
23+
)
24+
with zipf.open(metadata) as f:
25+
return f.read()
26+
27+
28+
def main(argv: Sequence[str] | None = None) -> int:
29+
parser = argparse.ArgumentParser()
30+
parser.add_argument("--pypi-url", required=True)
31+
args = parser.parse_args(argv)
32+
33+
url = urllib.parse.urljoin(args.pypi_url, "packages.json")
34+
packages = [json.loads(line) for line in urllib.request.urlopen(url)]
35+
36+
with tempfile.TemporaryDirectory() as tmpdir:
37+
os.makedirs(f"{tmpdir}/metadata")
38+
for package in packages:
39+
basename = os.path.basename(package["filename"])
40+
41+
if package.get("core_metadata"):
42+
print(f"skipping: core metadata already present for {basename}")
43+
continue
44+
45+
url = f"{args.pypi_url}/wheels/{basename}"
46+
fn = f"{tmpdir}/{basename}"
47+
48+
with urllib.request.urlopen(url) as resp, open(fn, "wb") as f:
49+
shutil.copyfileobj(resp, f)
50+
51+
metadata_bytes = _get_metadata_bytes(fn)
52+
metadata_sha256 = hashlib.sha256(metadata_bytes).hexdigest()
53+
54+
with open(f"{tmpdir}/metadata/{basename}.metadata", "wb") as f:
55+
f.write(metadata_bytes)
56+
57+
package["core_metadata"] = f"sha256={metadata_sha256}"
58+
print(f"core metadata fetched for {basename}")
59+
60+
packages_json = os.path.join(tmpdir, "packages.json")
61+
with open(packages_json, "w") as f:
62+
for package in packages:
63+
f.write(f"{json.dumps(package)}\n")
64+
65+
subprocess.check_call(
66+
(
67+
"gcloud",
68+
"storage",
69+
"cp",
70+
"-n", # no-clobber
71+
"--cache-control",
72+
"public, max-age=3600",
73+
f"{tmpdir}/metadata/*",
74+
"gs://pypi.devinfra.sentry.io/wheels/",
75+
)
76+
)
77+
subprocess.check_call(
78+
(
79+
"gcloud",
80+
"storage",
81+
"cp",
82+
# the packages.json file must be consistently read so no caching
83+
"--cache-control",
84+
"no-store",
85+
packages_json,
86+
"gs://pypi.devinfra.sentry.io",
87+
)
88+
)
89+
90+
return 0
91+
92+
93+
if __name__ == "__main__":
94+
raise SystemExit(main())

docker/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
FROM python:3.11.4-slim-bullseye
2+
23
RUN : \
34
&& apt-get update \
45
&& DEBIAN_FRONTEND=noninteractive apt-get install \

docker/requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
auditwheel>=5.1.2
22
delocate>=0.10.4
3-
dumb-pypi>=1.13.0
3+
dumb-pypi>=1.15.0
44
packaging>=21.3
55
patchelf>=0.14.5;sys_platform=="linux"
66
pip>=22.1.2

docker/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
auditwheel==5.1.2
88
delocate==0.10.4
99
distlib==0.3.8
10-
dumb-pypi==1.13.0
10+
dumb-pypi==1.15.0
1111
filelock==3.13.1
1212
jinja2==3.1.6
1313
markupsafe==2.1.1

make_index.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,29 +25,37 @@ def _commit_info() -> tuple[str, int]:
2525
return h, int(t)
2626

2727

28-
def _make_info(filename: str) -> dict[str, Any]:
29-
h, t = _commit_info()
30-
31-
with open(filename, "rb") as f:
32-
sha256 = hashlib.sha256(f.read()).hexdigest()
33-
28+
def _get_metadata_bytes(filename: str) -> bytes:
3429
with zipfile.ZipFile(filename) as zipf:
3530
(metadata,) = (
3631
name
3732
for name in zipf.namelist()
3833
if name.endswith(".dist-info/METADATA") and name.count("/") == 1
3934
)
4035
with zipf.open(metadata) as f:
41-
info = email.message_from_binary_file(f)
36+
return f.read()
37+
38+
39+
def _make_info(filename: str) -> dict[str, Any]:
40+
h, t = _commit_info()
41+
42+
with open(filename, "rb") as f:
43+
sha256 = hashlib.sha256(f.read()).hexdigest()
44+
45+
metadata_bytes = _get_metadata_bytes(filename)
46+
metadata_sha256 = hashlib.sha256(metadata_bytes).hexdigest()
47+
info = email.message_from_bytes(metadata_bytes)
4248

4349
dist_info = {
4450
"requires_dist": info.get_all("requires-dist"),
4551
"requires_python": info.get("requires-python"),
4652
}
4753

54+
# this is intended to be exactly the structure dumb-pypi generates
4855
return {
4956
"filename": os.path.basename(filename),
5057
"hash": f"sha256={sha256}",
58+
"core_metadata": f"sha256={metadata_sha256}",
5159
"upload_timestamp": t,
5260
"uploaded_by": f"git@{h}",
5361
**{k: v for k, v in dist_info.items() if v},
@@ -92,6 +100,9 @@ def main(argv: Sequence[str] | None = None) -> int:
92100
new_packages.append(_make_info(filename))
93101
shutil.copy(filename, wheels_dir)
94102

103+
with open(f"{wheels_dir}/{basename}.metadata", "wb") as f:
104+
f.write(_get_metadata_bytes(filename))
105+
95106
with tempfile.TemporaryDirectory() as tmpdir:
96107
prev_json = os.path.join(tmpdir, "previous.json")
97108
with open(prev_json, "w") as f:

packages.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,7 @@ validate_incorrect_missing_deps = psycopg2-binary
519519
[drf-spectacular==0.27.2]
520520

521521
[dumb-pypi==1.13.0]
522+
[dumb-pypi==1.15.0]
522523

523524
[ecdsa==0.18.0]
524525

tests/make_index_test.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import io
44
import json
55
import os.path
6+
import re
67
import urllib.request
78
import zipfile
89
from unittest import mock
@@ -30,6 +31,7 @@ def test_make_info_empty_wheel_metadata(tmp_path):
3031
assert ret == {
3132
"filename": "a-1-py3-none-any.whl",
3233
"hash": "sha256=64f7f4664408d711c17ad28c1d3ba7dd155501e67c8632fafc8a525ba3ebc527",
34+
"core_metadata": "sha256=d4528dc2d072c0e6d65addae8b5700fd29253b9eb9a9214aba539447d6f29fae",
3335
"upload_timestamp": mock.ANY,
3436
"uploaded_by": re_assert.Matches(r"^git@[a-f0-9]{7}"),
3537
}
@@ -56,6 +58,7 @@ def test_make_info_full_wheel_metadata(tmp_path):
5658
"jsonschema",
5759
"packaging (==21.3) ; extra = 'p'",
5860
],
61+
"core_metadata": "sha256=a015186125a83e6667547b156f8c6813e72fbab48c4ae635ac3c3a5f1d86aa9f",
5962
"requires_python": ">= 3.7, != 3.7.0",
6063
"upload_timestamp": mock.ANY,
6164
"uploaded_by": re_assert.Matches(r"^git@[a-f0-9]{7}"),
@@ -81,7 +84,36 @@ def test_main_new_package(tmp_path):
8184
# just some smoke tests about the output
8285
assert dest.joinpath("packages.json").exists()
8386
assert dest.joinpath("wheels/a-1-py3-none-any.whl").exists()
84-
assert dest.joinpath("simple/a/index.html").exists()
87+
88+
89+
def test_main_core_metadata(tmp_path):
90+
dist = tmp_path.joinpath("dist")
91+
dist.mkdir()
92+
make_wheel(dist.joinpath("a-1-py3-none-any.whl"), ())
93+
dest = tmp_path.joinpath("dest")
94+
95+
bio = io.BytesIO(b"")
96+
with mock.patch.object(urllib.request, "urlopen", return_value=bio):
97+
assert not make_index.main(
98+
(
99+
f"--dist={dist}",
100+
f"--dest={dest}",
101+
"--pypi-url=http://example.com",
102+
)
103+
)
104+
105+
wheel_sha = "64f7f4664408d711c17ad28c1d3ba7dd155501e67c8632fafc8a525ba3ebc527"
106+
metadata_sha = "d4528dc2d072c0e6d65addae8b5700fd29253b9eb9a9214aba539447d6f29fae"
107+
108+
with open(dest.joinpath("simple/a/index.html")) as f:
109+
index_html = re.sub(r"\s+", " ", f.read())
110+
assert (
111+
f'<a href="http://example.com/wheels/a-1-py3-none-any.whl#sha256={wheel_sha}" data-core-metadata="sha256={metadata_sha}" >a-1-py3-none-any.whl</a>'
112+
in index_html
113+
)
114+
115+
with open(dest.joinpath("wheels/a-1-py3-none-any.whl.metadata")) as f:
116+
assert f.read() == "Name: a\nVersion: 1\n"
85117

86118

87119
def test_main_multiple_provide_same_package_first_wins(tmp_path):

tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[tox]
2-
envlist = py38
2+
envlist = py
33

44
[testenv]
55
skip_install = true

0 commit comments

Comments
 (0)