-
Notifications
You must be signed in to change notification settings - Fork 91
Pin HuggingFace revisions in weights to commit SHAs on push #2164
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
bolasim
wants to merge
1
commit into
main
Choose a base branch
from
01-27-pin_huggingface_revisions_in_weights_to_commit_shas_on_push
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| """Tests for HuggingFace revision resolver.""" | ||
|
|
||
| from unittest.mock import Mock, patch | ||
|
|
||
| import pytest | ||
|
|
||
| from truss.util.hf_revision_resolver import ( | ||
| build_hf_source, | ||
| is_commit_sha, | ||
| parse_hf_source, | ||
| resolve_hf_revision, | ||
| ) | ||
|
|
||
|
|
||
| def test_parse_hf_source_with_revision(): | ||
| """Test parsing HF source with explicit revision.""" | ||
| repo_id, revision = parse_hf_source("hf://meta-llama/Llama-2-7b@main") | ||
| assert repo_id == "meta-llama/Llama-2-7b" | ||
| assert revision == "main" | ||
|
|
||
|
|
||
| def test_parse_hf_source_with_sha(): | ||
| """Test parsing HF source with commit SHA.""" | ||
| sha = "a" * 40 | ||
| repo_id, revision = parse_hf_source(f"hf://meta-llama/Llama-2-7b@{sha}") | ||
| assert repo_id == "meta-llama/Llama-2-7b" | ||
| assert revision == sha | ||
|
|
||
|
|
||
| def test_parse_hf_source_without_revision(): | ||
| """Test parsing HF source without revision (should return None).""" | ||
| repo_id, revision = parse_hf_source("hf://meta-llama/Llama-2-7b") | ||
| assert repo_id == "meta-llama/Llama-2-7b" | ||
| assert revision is None | ||
|
|
||
|
|
||
| def test_parse_hf_source_invalid(): | ||
| """Test parsing non-HF source raises ValueError.""" | ||
| with pytest.raises(ValueError, match="Not a HuggingFace source"): | ||
| parse_hf_source("s3://bucket/path") | ||
|
|
||
| with pytest.raises(ValueError, match="Not a HuggingFace source"): | ||
| parse_hf_source("gs://bucket/path") | ||
|
|
||
|
|
||
| def test_is_commit_sha(): | ||
| """Test commit SHA detection.""" | ||
| # Valid SHAs | ||
| assert is_commit_sha("a" * 40) | ||
| assert is_commit_sha("0123456789abcdef" * 2 + "01234567") | ||
| assert is_commit_sha("f" * 40) | ||
|
|
||
| # Invalid SHAs | ||
| assert not is_commit_sha("main") | ||
| assert not is_commit_sha("v1.0.0") | ||
| assert not is_commit_sha("a" * 39) # Too short | ||
| assert not is_commit_sha("a" * 41) # Too long | ||
| assert not is_commit_sha(None) | ||
| assert not is_commit_sha("") | ||
| assert not is_commit_sha("gggggggggggggggggggggggggggggggggggggggg") # Invalid hex | ||
|
|
||
|
|
||
| def test_build_hf_source(): | ||
| """Test building HF source URI.""" | ||
| sha = "abc1230000000000000000000000000000000000" | ||
| uri = build_hf_source("meta-llama/Llama-2-7b", sha) | ||
| assert uri == f"hf://meta-llama/Llama-2-7b@{sha}" | ||
|
|
||
|
|
||
| def test_build_hf_source_with_branch(): | ||
| """Test building HF source URI with branch name.""" | ||
| uri = build_hf_source("meta-llama/Llama-2-7b", "main") | ||
| assert uri == "hf://meta-llama/Llama-2-7b@main" | ||
|
|
||
|
|
||
| @patch("truss.util.hf_revision_resolver.HfApi") | ||
| def test_resolve_hf_revision(mock_hf_api): | ||
| """Test resolving HF revision to SHA.""" | ||
| mock_repo_info = Mock() | ||
| mock_repo_info.sha = "a" * 40 | ||
| mock_hf_api.return_value.repo_info.return_value = mock_repo_info | ||
|
|
||
| sha = resolve_hf_revision("meta-llama/Llama-2-7b", "main") | ||
| assert sha == "a" * 40 | ||
|
|
||
| # Verify API was called correctly | ||
| mock_hf_api.assert_called_once_with(token=None) | ||
| mock_hf_api.return_value.repo_info.assert_called_once_with( | ||
| repo_id="meta-llama/Llama-2-7b", revision="main", repo_type="model" | ||
| ) | ||
|
|
||
|
|
||
| @patch("truss.util.hf_revision_resolver.HfApi") | ||
| def test_resolve_hf_revision_without_revision(mock_hf_api): | ||
| """Test resolving HF revision when no revision specified (uses default).""" | ||
| mock_repo_info = Mock() | ||
| mock_repo_info.sha = "b" * 40 | ||
| mock_hf_api.return_value.repo_info.return_value = mock_repo_info | ||
|
|
||
| sha = resolve_hf_revision("meta-llama/Llama-2-7b", None) | ||
| assert sha == "b" * 40 | ||
|
|
||
| # Verify revision=None was passed | ||
| mock_hf_api.return_value.repo_info.assert_called_once_with( | ||
| repo_id="meta-llama/Llama-2-7b", revision=None, repo_type="model" | ||
| ) | ||
|
|
||
|
|
||
| @patch("truss.util.hf_revision_resolver.HfApi") | ||
| def test_resolve_hf_revision_with_token(mock_hf_api): | ||
| """Test resolving HF revision with auth token.""" | ||
| mock_repo_info = Mock() | ||
| mock_repo_info.sha = "c" * 40 | ||
| mock_hf_api.return_value.repo_info.return_value = mock_repo_info | ||
|
|
||
| sha = resolve_hf_revision("meta-llama/Llama-2-7b", "main", token="hf_test_token") | ||
| assert sha == "c" * 40 | ||
|
|
||
| # Verify token was passed to HfApi | ||
| mock_hf_api.assert_called_once_with(token="hf_test_token") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| """Resolve HuggingFace revisions to commit SHAs.""" | ||
|
|
||
| import logging | ||
| import re | ||
| from typing import Optional | ||
|
|
||
| from huggingface_hub import HfApi | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bad, import of optional deps at global level. Move to inside resolve_hf_revision( |
||
| from huggingface_hub.utils import HfHubHTTPError | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def resolve_hf_revision( | ||
| repo_id: str, revision: Optional[str] = None, token: Optional[str] = None | ||
| ) -> str: | ||
| """Resolve HF revision to commit SHA (mirrors Rust truss-transfer logic). | ||
|
|
||
| This does the same thing as the Rust code in truss-transfer/src/create/hf_metadata.rs: | ||
| - Calls api_repo.info().await | ||
| - Extracts repo_info.sha | ||
|
|
||
| This is used for best-effort revision pinning during push. Callers should handle | ||
| exceptions gracefully and allow the push to proceed even if resolution fails. | ||
|
|
||
| Args: | ||
| repo_id: HuggingFace repo ID (e.g., "meta-llama/Llama-2-7b") | ||
| revision: Branch, tag, or SHA (None = default branch) | ||
| token: Optional HF token for private repos | ||
|
|
||
| Returns: | ||
| Resolved commit SHA (40-char hex string) | ||
|
|
||
| Raises: | ||
| HfHubHTTPError: If repo doesn't exist, revision is invalid, or network issues | ||
| """ | ||
| try: | ||
| api = HfApi(token=token) | ||
| repo_info = api.repo_info(repo_id=repo_id, revision=revision, repo_type="model") | ||
| return repo_info.sha | ||
| except HfHubHTTPError as e: | ||
| logger.debug(f"Failed to resolve HF revision for {repo_id}@{revision}: {e}") | ||
| raise | ||
|
|
||
|
|
||
| def is_commit_sha(revision: Optional[str]) -> bool: | ||
| """Check if revision is already a 40-character commit SHA.""" | ||
| if not revision: | ||
| return False | ||
| return bool(re.match(r"^[0-9a-f]{40}$", revision)) | ||
|
|
||
|
|
||
| def parse_hf_source(source: str) -> tuple[str, Optional[str]]: | ||
| """Parse HuggingFace source URI into repo_id and revision. | ||
|
|
||
| Args: | ||
| source: URI like "hf://owner/repo@revision" or "hf://owner/repo" | ||
|
|
||
| Returns: | ||
| Tuple of (repo_id, revision or None) | ||
|
|
||
| Examples: | ||
| >>> parse_hf_source("hf://meta-llama/Llama-2-7b@main") | ||
| ("meta-llama/Llama-2-7b", "main") | ||
| >>> parse_hf_source("hf://meta-llama/Llama-2-7b") | ||
| ("meta-llama/Llama-2-7b", None) | ||
| """ | ||
| if not source.startswith("hf://"): | ||
| raise ValueError(f"Not a HuggingFace source: {source}") | ||
|
|
||
| # Remove "hf://" prefix | ||
| path = source[5:] | ||
|
|
||
| # Split on @ to get repo_id and revision | ||
| if "@" in path: | ||
| repo_id, revision = path.rsplit("@", 1) | ||
| return repo_id, revision | ||
|
|
||
| return path, None | ||
|
|
||
|
|
||
| def build_hf_source(repo_id: str, revision: str) -> str: | ||
| """Build HuggingFace source URI from repo_id and revision. | ||
|
|
||
| Args: | ||
| repo_id: HuggingFace repo ID | ||
| revision: Commit SHA or branch/tag name | ||
|
|
||
| Returns: | ||
| URI like "hf://owner/repo@revision" | ||
| """ | ||
| return f"hf://{repo_id}@{revision}" | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
now you're running the try except for every push. Not good. Only try when weights are used and huggingface is used and revision is main or startswith(refs)