|
| 1 | +"""Fetching utilities for extensions.""" |
| 2 | + |
| 3 | +import hashlib |
| 4 | +from enum import StrEnum |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +from openhands.sdk.git.cached_repo import GitHelper, try_cached_clone_or_update |
| 8 | +from openhands.sdk.git.utils import extract_repo_name, is_git_url, normalize_git_url |
| 9 | +from openhands.sdk.logger import get_logger |
| 10 | + |
| 11 | + |
| 12 | +logger = get_logger(__name__) |
| 13 | + |
| 14 | + |
| 15 | +class ExtensionFetchError(Exception): |
| 16 | + """Raised when fetching an extension fails.""" |
| 17 | + |
| 18 | + |
| 19 | +class SourceType(StrEnum): |
| 20 | + """Classification of an extension source. |
| 21 | +
|
| 22 | + LOCAL -- a filesystem path (absolute, home-relative, or dot-relative). |
| 23 | + GIT -- any git-clonable URL (HTTPS, SSH, git://, etc.). |
| 24 | + GITHUB -- the ``github:owner/repo`` shorthand, expanded to an HTTPS URL. |
| 25 | + """ |
| 26 | + |
| 27 | + LOCAL = "local" |
| 28 | + GIT = "git" |
| 29 | + GITHUB = "github" |
| 30 | + |
| 31 | + |
| 32 | +def parse_extension_source(source: str) -> tuple[SourceType, str]: |
| 33 | + """Parse extension source into (SourceType, url). |
| 34 | +
|
| 35 | + Args: |
| 36 | + source: Extension source string. Can be: |
| 37 | + - "github:owner/repo" - GitHub repository shorthand |
| 38 | + - "https://github.com/owner/repo.git" - Full git URL |
| 39 | + - "git@github.com:owner/repo.git" - SSH git URL |
| 40 | + - "/local/path" - Local path |
| 41 | +
|
| 42 | + Returns: |
| 43 | + Tuple of (source_type, normalized_url) where source_type is one of: |
| 44 | + - SourceType.GITHUB: GitHub repository |
| 45 | + - SourceType.GIT: Any git URL |
| 46 | + - SourceType.LOCAL: Local filesystem path |
| 47 | +
|
| 48 | + Examples: |
| 49 | + >>> parse_extension_source("github:owner/repo") |
| 50 | + (SourceType.GITHUB, "https://github.com/owner/repo.git") |
| 51 | + >>> parse_extension_source("https://gitlab.com/org/repo.git") |
| 52 | + (SourceType.GIT, "https://gitlab.com/org/repo.git") |
| 53 | + >>> parse_extension_source("/local/path") |
| 54 | + (SourceType.LOCAL, "/local/path") |
| 55 | + """ |
| 56 | + source = source.strip() |
| 57 | + |
| 58 | + # GitHub shorthand: github:owner/repo |
| 59 | + if source.startswith("github:"): |
| 60 | + repo_path = source[7:] # Remove "github:" prefix |
| 61 | + # Validate format |
| 62 | + if "/" not in repo_path or repo_path.count("/") > 1: |
| 63 | + raise ExtensionFetchError( |
| 64 | + f"Invalid GitHub shorthand format: {source}. " |
| 65 | + f"Expected format: github:owner/repo" |
| 66 | + ) |
| 67 | + url = f"https://github.com/{repo_path}.git" |
| 68 | + return (SourceType.GITHUB, url) |
| 69 | + |
| 70 | + # Git URLs: detect by protocol/scheme rather than enumerating providers |
| 71 | + # This handles GitHub, GitLab, Bitbucket, Codeberg, self-hosted instances, etc. |
| 72 | + if is_git_url(source): |
| 73 | + url = normalize_git_url(source) |
| 74 | + return (SourceType.GIT, url) |
| 75 | + |
| 76 | + # Local path: starts with /, ~, . or contains / without a URL scheme |
| 77 | + if source.startswith(("/", "~", ".")): |
| 78 | + return (SourceType.LOCAL, source) |
| 79 | + |
| 80 | + if "/" in source and "://" not in source: |
| 81 | + # Relative path like "plugins/my-plugin" |
| 82 | + return (SourceType.LOCAL, source) |
| 83 | + |
| 84 | + raise ExtensionFetchError( |
| 85 | + f"Unable to parse extension source: {source}. " |
| 86 | + f"Expected formats: 'github:owner/repo', git URL, or local path" |
| 87 | + ) |
| 88 | + |
| 89 | + |
| 90 | +def _resolve_local_source(url: str) -> Path: |
| 91 | + """Resolve a local extension source to a path. |
| 92 | +
|
| 93 | + Args: |
| 94 | + url: Local path string (may contain ~ for home directory). |
| 95 | +
|
| 96 | + Returns: |
| 97 | + Resolved absolute path to the extension directory. |
| 98 | +
|
| 99 | + Raises: |
| 100 | + ExtensionFetchError: If path doesn't exist. |
| 101 | + """ |
| 102 | + local_path = Path(url).expanduser().resolve() |
| 103 | + if not local_path.exists(): |
| 104 | + raise ExtensionFetchError(f"Local extension path does not exist: {local_path}") |
| 105 | + return local_path |
| 106 | + |
| 107 | + |
| 108 | +def _apply_subpath(base_path: Path, subpath: str | None, context: str) -> Path: |
| 109 | + """Apply a subpath to a base path, validating it exists. |
| 110 | +
|
| 111 | + Args: |
| 112 | + base_path: The root path. |
| 113 | + subpath: Optional subdirectory path (may have leading/trailing slashes). |
| 114 | + context: Description for error messages (e.g., "extension repository"). |
| 115 | +
|
| 116 | + Returns: |
| 117 | + The final path (base_path if no subpath, otherwise base_path/subpath). |
| 118 | +
|
| 119 | + Raises: |
| 120 | + ExtensionFetchError: If subpath doesn't exist. |
| 121 | + """ |
| 122 | + if not subpath: |
| 123 | + return base_path |
| 124 | + |
| 125 | + final_path = base_path / subpath.strip("/") |
| 126 | + if not final_path.exists(): |
| 127 | + raise ExtensionFetchError(f"Subdirectory '{subpath}' not found in {context}") |
| 128 | + return final_path |
| 129 | + |
| 130 | + |
| 131 | +def fetch( |
| 132 | + source: str, |
| 133 | + cache_dir: Path, |
| 134 | + ref: str | None = None, |
| 135 | + update: bool = True, |
| 136 | + repo_path: str | None = None, |
| 137 | + git_helper: GitHelper | None = None, |
| 138 | +) -> Path: |
| 139 | + """Fetch an extension from a source and return the local path. |
| 140 | +
|
| 141 | + Args: |
| 142 | + source: Extension source -- git URL, GitHub shorthand, or local path. |
| 143 | + cache_dir: Directory for caching. |
| 144 | + ref: Optional branch, tag, or commit to checkout. |
| 145 | + update: If true and cache exists, update it. |
| 146 | + repo_path: Subdirectory path within the repository. |
| 147 | + git_helper: GitHelper instance (for testing). |
| 148 | +
|
| 149 | + Returns: |
| 150 | + Path to the local extension directory. |
| 151 | + """ |
| 152 | + path, _ = fetch_with_resolution( |
| 153 | + source=source, |
| 154 | + cache_dir=cache_dir, |
| 155 | + ref=ref, |
| 156 | + update=update, |
| 157 | + repo_path=repo_path, |
| 158 | + git_helper=git_helper, |
| 159 | + ) |
| 160 | + return path |
| 161 | + |
| 162 | + |
| 163 | +def fetch_with_resolution( |
| 164 | + source: str, |
| 165 | + cache_dir: Path, |
| 166 | + ref: str | None = None, |
| 167 | + update: bool = True, |
| 168 | + repo_path: str | None = None, |
| 169 | + git_helper: GitHelper | None = None, |
| 170 | +) -> tuple[Path, str | None]: |
| 171 | + """Fetch an extension and return both the path and resolved commit SHA. |
| 172 | +
|
| 173 | + Args: |
| 174 | + source: Extension source (git URL, GitHub shorthand, or local path). |
| 175 | + cache_dir: Directory for caching. |
| 176 | + ref: Optional branch, tag, or commit to checkout. |
| 177 | + update: If True and cache exists, update it. |
| 178 | + repo_path: Subdirectory path within the repository. |
| 179 | + git_helper: GitHelper instance (for testing). |
| 180 | +
|
| 181 | + Returns: |
| 182 | + Tuple of (path, resolved_ref) where resolved_ref is the commit SHA for git |
| 183 | + sources and None for local paths. |
| 184 | +
|
| 185 | + Raises: |
| 186 | + ExtensionFetchError: If fetching the extension fails. |
| 187 | + """ |
| 188 | + source_type, url = parse_extension_source(source) |
| 189 | + |
| 190 | + if source_type == SourceType.LOCAL: |
| 191 | + if repo_path is not None: |
| 192 | + raise ExtensionFetchError( |
| 193 | + f"repo_path is not supported for local extension sources. " |
| 194 | + f"Specify the full path directly instead of " |
| 195 | + f"source='{source}' + repo_path='{repo_path}'" |
| 196 | + ) |
| 197 | + return _resolve_local_source(url), None |
| 198 | + |
| 199 | + git = git_helper if git_helper is not None else GitHelper() |
| 200 | + |
| 201 | + ext_path, resolved_ref = _fetch_remote_source_with_resolution( |
| 202 | + url, cache_dir, ref, update, repo_path, git, source |
| 203 | + ) |
| 204 | + return ext_path, resolved_ref |
| 205 | + |
| 206 | + |
| 207 | +def get_cache_path(source: str, cache_dir: Path) -> Path: |
| 208 | + """Get the cache path for an extension source. |
| 209 | +
|
| 210 | + Creates a deterministic path based on a hash of the source URL. |
| 211 | +
|
| 212 | + Args: |
| 213 | + source: The extension source (URL or path). |
| 214 | + cache_dir: Base cache directory. |
| 215 | +
|
| 216 | + Returns: |
| 217 | + Path where the extension should be cached. |
| 218 | + """ |
| 219 | + # Create a hash of the source for the directory name |
| 220 | + source_hash = hashlib.sha256(source.encode()).hexdigest()[:16] |
| 221 | + |
| 222 | + # Extract repo name for human-readable cache directory name |
| 223 | + readable_name = extract_repo_name(source) |
| 224 | + |
| 225 | + cache_name = f"{readable_name}-{source_hash}" |
| 226 | + return cache_dir / cache_name |
| 227 | + |
| 228 | + |
| 229 | +def _fetch_remote_source_with_resolution( |
| 230 | + url: str, |
| 231 | + cache_dir: Path, |
| 232 | + ref: str | None, |
| 233 | + update: bool, |
| 234 | + subpath: str | None, |
| 235 | + git_helper: GitHelper, |
| 236 | + source: str, |
| 237 | +) -> tuple[Path, str]: |
| 238 | + """Fetch a remote extension source and return path + resolved commit SHA. |
| 239 | +
|
| 240 | + Args: |
| 241 | + url: Git URL to fetch. |
| 242 | + cache_dir: Base directory for caching. |
| 243 | + ref: Optional branch, tag, or commit to checkout. |
| 244 | + update: Whether to update existing cache. |
| 245 | + subpath: Optional subdirectory within the repository. |
| 246 | + git_helper: GitHelper instance for git operations. |
| 247 | + source: Original source string (for error messages). |
| 248 | +
|
| 249 | + Returns: |
| 250 | + Tuple of (path, resolved_ref) where resolved_ref is the commit SHA. |
| 251 | +
|
| 252 | + Raises: |
| 253 | + ExtensionFetchError: If fetching fails or subpath is invalid. |
| 254 | + """ |
| 255 | + repo_cache_path = get_cache_path(url, cache_dir) |
| 256 | + cache_dir.mkdir(parents=True, exist_ok=True) |
| 257 | + |
| 258 | + result = try_cached_clone_or_update( |
| 259 | + url=url, |
| 260 | + repo_path=repo_cache_path, |
| 261 | + ref=ref, |
| 262 | + update=update, |
| 263 | + git_helper=git_helper, |
| 264 | + ) |
| 265 | + |
| 266 | + if result is None: |
| 267 | + raise ExtensionFetchError(f"Failed to fetch extension from {source}") |
| 268 | + |
| 269 | + # Get the actual commit SHA that was checked out |
| 270 | + try: |
| 271 | + resolved_ref = git_helper.get_head_commit(repo_cache_path) |
| 272 | + except Exception as e: |
| 273 | + logger.warning(f"Could not get commit SHA for {source}: {e}") |
| 274 | + # Fall back to the requested ref if we can't get the SHA |
| 275 | + resolved_ref = ref or "HEAD" |
| 276 | + |
| 277 | + final_path = _apply_subpath(repo_cache_path, subpath, "extension repository") |
| 278 | + return final_path, resolved_ref |
0 commit comments