Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions .builders/tests/test_upload.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import email.message
import json
from hashlib import sha256
from pathlib import Path
from unittest import mock
from zipfile import ZipFile
Expand Down Expand Up @@ -602,6 +604,77 @@ def track_upload(content, path, content_type='text/plain', cache_control=None):
assert 'href="package2/"' in root_html


@pytest.fixture
def patched_input_files(tmp_path, monkeypatch):
dep_content = b'requests==2.31.0\n'
dep_file = tmp_path / 'agent_requirements.in'
dep_file.write_bytes(dep_content)

workflow_content = b'on: push\n'
workflow_file = tmp_path / '.github' / 'workflows' / 'resolve-build-deps.yaml'
workflow_file.parent.mkdir(parents=True)
workflow_file.write_bytes(workflow_content)

builder_dir = tmp_path / '.builders'
builder_dir.mkdir()
(builder_dir / 'upload.py').write_bytes(b'# script\n')

monkeypatch.setattr(upload, 'REPO_DIR', tmp_path)
monkeypatch.setattr(upload, 'DIRECT_DEP_FILE', dep_file)
monkeypatch.setattr(upload, 'WORKFLOW_FILE', workflow_file)
monkeypatch.setattr(upload, 'BUILDER_DIR', builder_dir)

return dep_content, workflow_content, builder_dir


def test_hash_directory(tmp_path):
(tmp_path / 'a.txt').write_bytes(b'hello')
(tmp_path / 'b.txt').write_bytes(b'world')

result = upload.hash_directory(tmp_path)
assert result == upload.hash_directory(tmp_path)

(tmp_path / 'a.txt').write_bytes(b'changed')
assert upload.hash_directory(tmp_path) != result

(tmp_path / 'a.txt').write_bytes(b'hello')
assert upload.hash_directory(tmp_path) == result

(tmp_path / 'a.txt').rename(tmp_path / 'z.txt')
assert upload.hash_directory(tmp_path) != result


def test_compute_input_hashes(patched_input_files):
dep_content, workflow_content, builder_dir = patched_input_files

result = upload.compute_input_hashes()

assert set(result.keys()) == {'agent_requirements.in', '.github/workflows/resolve-build-deps.yaml', '.builders'}
assert result['agent_requirements.in'] == sha256(dep_content).hexdigest()
assert result['.github/workflows/resolve-build-deps.yaml'] == sha256(workflow_content).hexdigest()
assert result['.builders'] == upload.hash_directory(builder_dir)


def test_generate_lockfiles_metadata_contains_inputs(tmp_path, patched_input_files, monkeypatch):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

patched_input_files is not used in the function.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

dep_content, _, _ = patched_input_files

fake_deps_dir = tmp_path / '.deps'
fake_resolved_dir = fake_deps_dir / 'resolved'
fake_deps_dir.mkdir()
fake_resolved_dir.mkdir()

monkeypatch.setattr(upload, 'RESOLUTION_DIR', fake_deps_dir)
monkeypatch.setattr(upload, 'LOCK_FILE_DIR', fake_resolved_dir)

upload.generate_lockfiles(tmp_path, {})

metadata = json.loads((fake_deps_dir / 'metadata.json').read_text())
assert 'inputs' in metadata
assert set(metadata['inputs'].keys()) == {'agent_requirements.in', '.github/workflows/resolve-build-deps.yaml', '.builders'}
assert metadata['inputs']['agent_requirements.in'] == sha256(dep_content).hexdigest()
assert metadata['sha256'] == sha256(dep_content).hexdigest()


def test_upload(setup_targets_dir, setup_fake_hash):
"""Basic end-to-end test of upload with a mocked bucket."""
wheels = {
Expand Down
31 changes: 30 additions & 1 deletion .builders/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
RESOLUTION_DIR = REPO_DIR / '.deps'
LOCK_FILE_DIR = RESOLUTION_DIR / 'resolved'
DIRECT_DEP_FILE = REPO_DIR / 'agent_requirements.in'
WORKFLOW_FILE = REPO_DIR / '.github/workflows/resolve-build-deps.yaml'
CACHE_CONTROL = 'public, max-age=15'
VALID_PROJECT_NAME = re.compile(r'^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$', re.IGNORECASE)
UNNORMALIZED_PROJECT_NAME_CHARS = re.compile(r'[-_.]+')
Expand Down Expand Up @@ -76,6 +77,32 @@ def hash_file(path: Path) -> str:
return sha256(f.read()).hexdigest()


def hash_directory(path: Path) -> str:
"""Compute a combined SHA256 hash of all files in a directory."""
h = sha256()
for file_path in sorted(path.rglob('*'), key=lambda p: p.relative_to(path)):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

path.rglob('*') descends into everything. In CI this might be stable depending on the local clone state, but maybe not depending on generated Python cache files => consider filtering.

rel = file_path.relative_to(path)
if file_path.is_file() and not any(
part in {'__pycache__', '.pytest_cache'} or part.endswith('.pyc')
for part in rel.parts
):
h.update(rel.as_posix().encode())
h.update(file_path.read_bytes())
return h.hexdigest()


def compute_input_hashes() -> dict[str, str]:
"""Compute SHA256 hashes for all dependency resolution inputs."""
try:
return {
DIRECT_DEP_FILE.relative_to(REPO_DIR).as_posix(): hash_file(DIRECT_DEP_FILE),
WORKFLOW_FILE.relative_to(REPO_DIR).as_posix(): hash_file(WORKFLOW_FILE),
BUILDER_DIR.relative_to(REPO_DIR).as_posix(): hash_directory(BUILDER_DIR),
}
except FileNotFoundError as e:
raise RuntimeError(f'Missing dependency resolution input: {e}') from e


def _build_number_of_wheel(wheel_info: dict) -> int:
"""Extract the build number from wheel information."""
wheel_name = PurePosixPath(wheel_info['name']).stem
Expand Down Expand Up @@ -279,9 +306,11 @@ def generate_lockfiles(targets_dir, lockfiles):
targets_dir = Path(targets_dir)
LOCK_FILE_DIR.mkdir(parents=True, exist_ok=True)
with RESOLUTION_DIR.joinpath('metadata.json').open('w', encoding='utf-8') as f:
inputs = compute_input_hashes()
contents = json.dumps(
{
'sha256': sha256(DIRECT_DEP_FILE.read_bytes()).hexdigest(),
'inputs': inputs,
'sha256': inputs['agent_requirements.in'],
},
indent=2,
sort_keys=True,
Expand Down
Loading