Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions .builders/tests/test_upload.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import email.message
import json
from hashlib import sha256
from pathlib import Path
from unittest import mock
from zipfile import ZipFile
Expand Down Expand Up @@ -602,6 +604,71 @@ def track_upload(content, path, content_type='text/plain', cache_control=None):
assert 'href="package2/"' in root_html


@pytest.fixture
def patched_input_files(tmp_path, monkeypatch):
dep_file = tmp_path / 'agent_requirements.in'
dep_file.write_bytes(b'requests==2.31.0\n')

workflow_file = tmp_path / 'resolve-build-deps.yaml'
workflow_file.write_bytes(b'on: push\n')

builder_dir = tmp_path / '.builders'
builder_dir.mkdir()
(builder_dir / 'upload.py').write_bytes(b'# script\n')

monkeypatch.setattr(upload, 'DIRECT_DEP_FILE', dep_file)
monkeypatch.setattr(upload, 'WORKFLOW_FILE', workflow_file)
monkeypatch.setattr(upload, 'BUILDER_DIR', builder_dir)

return dep_file, workflow_file, builder_dir


def test_hash_directory(tmp_path):
(tmp_path / 'a.txt').write_bytes(b'hello')
(tmp_path / 'b.txt').write_bytes(b'world')

result = upload.hash_directory(tmp_path)
assert result == upload.hash_directory(tmp_path)

(tmp_path / 'a.txt').write_bytes(b'changed')
assert upload.hash_directory(tmp_path) != result

(tmp_path / 'a.txt').write_bytes(b'hello')
assert upload.hash_directory(tmp_path) == result

(tmp_path / 'a.txt').rename(tmp_path / 'z.txt')
assert upload.hash_directory(tmp_path) != result


def test_compute_input_hashes(patched_input_files):
_, _, builder_dir = patched_input_files

result = upload.compute_input_hashes()

assert set(result.keys()) == {'agent_requirements.in', '.github/workflows/resolve-build-deps.yaml', '.builders'}
assert result['agent_requirements.in'] == sha256(b'requests==2.31.0\n').hexdigest()
assert result['.github/workflows/resolve-build-deps.yaml'] == sha256(b'on: push\n').hexdigest()
assert result['.builders'] == upload.hash_directory(builder_dir)


def test_generate_lockfiles_metadata_contains_inputs(tmp_path, patched_input_files, monkeypatch):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

patched_input_files is not used in the function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

fake_deps_dir = tmp_path / '.deps'
fake_resolved_dir = fake_deps_dir / 'resolved'
fake_deps_dir.mkdir()
fake_resolved_dir.mkdir()

monkeypatch.setattr(upload, 'RESOLUTION_DIR', fake_deps_dir)
monkeypatch.setattr(upload, 'LOCK_FILE_DIR', fake_resolved_dir)

upload.generate_lockfiles(tmp_path, {})

metadata = json.loads((fake_deps_dir / 'metadata.json').read_text())
assert 'inputs' in metadata
assert set(metadata['inputs'].keys()) == {'agent_requirements.in', '.github/workflows/resolve-build-deps.yaml', '.builders'}
assert metadata['inputs']['agent_requirements.in'] == sha256(b'requests==2.31.0\n').hexdigest()
assert metadata['sha256'] == sha256(b'requests==2.31.0\n').hexdigest()


def test_upload(setup_targets_dir, setup_fake_hash):
"""Basic end-to-end test of upload with a mocked bucket."""
wheels = {
Expand Down
27 changes: 26 additions & 1 deletion .builders/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
RESOLUTION_DIR = REPO_DIR / '.deps'
LOCK_FILE_DIR = RESOLUTION_DIR / 'resolved'
DIRECT_DEP_FILE = REPO_DIR / 'agent_requirements.in'
WORKFLOW_FILE = REPO_DIR / '.github/workflows/resolve-build-deps.yaml'
CACHE_CONTROL = 'public, max-age=15'
VALID_PROJECT_NAME = re.compile(r'^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$', re.IGNORECASE)
UNNORMALIZED_PROJECT_NAME_CHARS = re.compile(r'[-_.]+')
Expand Down Expand Up @@ -76,6 +77,28 @@ def hash_file(path: Path) -> str:
return sha256(f.read()).hexdigest()


def hash_directory(path: Path) -> str:
"""Compute a combined SHA256 hash of all files in a directory."""
h = sha256()
for file_path in sorted(path.rglob('*'), key=lambda p: p.relative_to(path)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

path.rglob('*') descends into everything. In CI this might be stable depending on the local clone state, but maybe not depending on generated Python cache files => consider filtering.

if file_path.is_file():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Example:

Suggested change
if file_path.is_file():
if file_path.is_file() and file_patch.suffix() != ".pyc" and not any(p.startswith('.') or p == '__pycache__' for p in file_path.parts):

(dirty+heavy+not tested, there's probably a much better way to achieve the same)

h.update(file_path.relative_to(path).as_posix().encode())
h.update(file_path.read_bytes())
return h.hexdigest()


def compute_input_hashes() -> dict[str, str]:
"""Compute SHA256 hashes for all dependency resolution inputs."""
try:
return {
'agent_requirements.in': hash_file(DIRECT_DEP_FILE),
'.github/workflows/resolve-build-deps.yaml': hash_file(WORKFLOW_FILE),
'.builders': hash_directory(BUILDER_DIR),
Copy link
Contributor

@rdesgroppes rdesgroppes Mar 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hash_directory includes itself in its own hash => potential instability:
when upload.py changes, hash_directory(BUILDER_DIR) will change (BUILDER_DIR = Path(__file__).parent), which might be the intended behavior, but it also means the hash of .builders includes any .pyc cache files, __pycache__/, .pytest_cache/, etc. generated during current or earlier runs.

(see comment on path.rglob('*') for a suggested fix)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is intended behavior but you're right that we can ignore some files.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The keys 'agent_requirements.in', '.github/workflows/resolve-build-deps.yaml', '.builders' are string literals unrelated to the actual constants DIRECT_DEP_FILE, WORKFLOW_FILE, BUILDER_DIR.
If those constants are ever changed (e.g., the workflow is renamed), the metadata keys won't update and the future check comparing hashes will silently mismatch.

Consider deriving the keys from the constants:

DIRECT_DEP_FILE.relative_to(REPO_DIR).as_posix(),  # 'agent_requirements.in'
WORKFLOW_FILE.relative_to(REPO_DIR).as_posix(),  # '.github/workflows/resolve-build-deps.yaml'
BUILDER_DIR.relative_to(REPO_DIR).as_posix(),  # '.builders'

This would also validate that the constants are actually rooted under REPO_DIR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}
except FileNotFoundError as e:
raise RuntimeError(f'Missing dependency resolution input: {e}') from e


def _build_number_of_wheel(wheel_info: dict) -> int:
"""Extract the build number from wheel information."""
wheel_name = PurePosixPath(wheel_info['name']).stem
Expand Down Expand Up @@ -279,9 +302,11 @@ def generate_lockfiles(targets_dir, lockfiles):
targets_dir = Path(targets_dir)
LOCK_FILE_DIR.mkdir(parents=True, exist_ok=True)
with RESOLUTION_DIR.joinpath('metadata.json').open('w', encoding='utf-8') as f:
inputs = compute_input_hashes()
contents = json.dumps(
{
'sha256': sha256(DIRECT_DEP_FILE.read_bytes()).hexdigest(),
'inputs': inputs,
'sha256': inputs['agent_requirements.in'],
},
indent=2,
sort_keys=True,
Expand Down
Loading