Skip to content

Commit 4f3e096

Browse files
authored
Merge pull request #192 from alex-feel/alex-feel-dev
Convert GitHub/GitLab tree URLs to raw URLs for skills validation
2 parents 2aad95a + edfe096 commit 4f3e096

File tree

2 files changed

+172
-6
lines changed

2 files changed

+172
-6
lines changed

scripts/setup_environment.py

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import json
1515
import os
1616
import platform
17+
import re
1718
import shutil
1819
import ssl
1920
import subprocess
@@ -403,7 +404,6 @@ def expand_tildes_in_command(command: str) -> str:
403404
>>> expand_tildes_in_command("echo 'text' >> ~/.config/file")
404405
"echo 'text' >> /home/user/.config/file"
405406
"""
406-
import re
407407

408408
# Pattern matches ~ and ~username paths
409409
# Matches: ~ followed by optional username, then slash and path components
@@ -866,7 +866,9 @@ def validate_all_config_files(
866866
if isinstance(skill_file_item, str):
867867
# Build full path for validation
868868
if skill_base.startswith(('http://', 'https://')):
869-
full_url = f"{skill_base.rstrip('/')}/{skill_file_item}"
869+
# Convert tree/blob URLs to raw URLs for validation
870+
raw_base = convert_to_raw_url(skill_base)
871+
full_url = f"{raw_base.rstrip('/')}/{skill_file_item}"
870872
files_to_check.append(('skill', f'{skill_name}/{skill_file_item}', full_url, True))
871873
else:
872874
resolved_base, _ = resolve_resource_path(skill_base, config_source, None)
@@ -960,6 +962,65 @@ def detect_repo_type(url: str) -> str | None:
960962
return None
961963

962964

965+
def convert_to_raw_url(url: str) -> str:
966+
"""Convert GitHub/GitLab web UI URLs to raw content URLs.
967+
968+
Transforms repository web interface URLs (tree/blob views) to their raw
969+
content equivalents that can be downloaded directly.
970+
971+
Supports:
972+
- GitHub: tree/blob URLs -> raw.githubusercontent.com
973+
- GitLab: tree/blob URLs -> raw URLs (works with self-hosted instances)
974+
975+
Args:
976+
url: URL to convert (may be a web UI URL, raw URL, or local path)
977+
978+
Returns:
979+
Raw content URL if conversion was possible, otherwise the original URL unchanged.
980+
981+
Examples:
982+
>>> convert_to_raw_url("https://github.com/org/repo/tree/main/path")
983+
'https://raw.githubusercontent.com/org/repo/main/path'
984+
985+
>>> convert_to_raw_url("https://gitlab.com/ns/proj/-/tree/main/path")
986+
'https://gitlab.com/ns/proj/-/raw/main/path'
987+
988+
>>> convert_to_raw_url("https://raw.githubusercontent.com/org/repo/main/path")
989+
'https://raw.githubusercontent.com/org/repo/main/path'
990+
991+
>>> convert_to_raw_url("./local/path")
992+
'./local/path'
993+
"""
994+
# Return unchanged if not a URL
995+
if not url.startswith(('http://', 'https://')):
996+
return url
997+
998+
# Already a raw URL - return unchanged
999+
if 'raw.githubusercontent.com' in url:
1000+
return url
1001+
1002+
# GitHub transformation
1003+
# Pattern: github.com/{owner}/{repo}/(tree|blob)/{branch}/{path}
1004+
# Also handles refs/heads/ prefix in branch name
1005+
github_pattern = r'https://github\.com/([^/]+)/([^/]+)/(tree|blob)/(.+)'
1006+
github_match = re.match(github_pattern, url.rstrip('/'))
1007+
if github_match:
1008+
owner, repo, _, branch_and_path = github_match.groups()
1009+
# Handle refs/heads/ prefix if present
1010+
branch_and_path = branch_and_path.removeprefix('refs/heads/')
1011+
return f'https://raw.githubusercontent.com/{owner}/{repo}/{branch_and_path}'
1012+
1013+
# GitLab transformation (works with self-hosted instances)
1014+
# Pattern: any URL containing /-/tree/ or /-/blob/
1015+
if '/-/tree/' in url:
1016+
return url.replace('/-/tree/', '/-/raw/')
1017+
if '/-/blob/' in url:
1018+
return url.replace('/-/blob/', '/-/raw/')
1019+
1020+
# Return unchanged if no transformation applied
1021+
return url
1022+
1023+
9631024
def convert_gitlab_url_to_api(url: str) -> str:
9641025
"""Convert GitLab web UI URL to API URL for authentication.
9651026
@@ -1997,8 +2058,9 @@ def validate_skill_files(
19972058

19982059
# Build full path: base + file_path
19992060
if base.startswith(('http://', 'https://')):
2000-
# Remote base - combine URL
2001-
full_url = f"{base.rstrip('/')}/{file_path}"
2061+
# Remote base - convert tree/blob URLs to raw URLs
2062+
raw_base = convert_to_raw_url(base)
2063+
full_url = f"{raw_base.rstrip('/')}/{file_path}"
20022064
auth_headers = get_auth_headers(full_url, auth_param)
20032065
is_valid, method = validate_file_availability(full_url, auth_headers)
20042066
else:
@@ -2064,8 +2126,9 @@ def process_skill(
20642126

20652127
# Build source path
20662128
if base.startswith(('http://', 'https://')):
2067-
# Remote source - download file
2068-
source_url = f"{base.rstrip('/')}/{file_path}"
2129+
# Remote source - convert tree/blob URLs to raw URLs for download
2130+
raw_base = convert_to_raw_url(base)
2131+
source_url = f"{raw_base.rstrip('/')}/{file_path}"
20692132
try:
20702133
content = fetch_url_with_auth(source_url, auth_param=auth_param)
20712134
destination.write_text(content, encoding='utf-8')

tests/test_setup_environment_skills.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,109 @@
3131
import setup_environment
3232

3333

34+
class TestConvertToRawUrl:
35+
"""Test URL transformation for skills - converting tree/blob URLs to raw URLs."""
36+
37+
def test_github_tree_url(self) -> None:
38+
"""Test converting GitHub tree URL to raw URL."""
39+
url = 'https://github.com/owner/repo/tree/main/skills/my-skill'
40+
expected = 'https://raw.githubusercontent.com/owner/repo/main/skills/my-skill'
41+
assert setup_environment.convert_to_raw_url(url) == expected
42+
43+
def test_github_blob_url(self) -> None:
44+
"""Test converting GitHub blob URL to raw URL."""
45+
url = 'https://github.com/owner/repo/blob/main/skills/my-skill'
46+
expected = 'https://raw.githubusercontent.com/owner/repo/main/skills/my-skill'
47+
assert setup_environment.convert_to_raw_url(url) == expected
48+
49+
def test_github_raw_url_unchanged(self) -> None:
50+
"""Test that raw.githubusercontent.com URLs are returned unchanged."""
51+
url = 'https://raw.githubusercontent.com/owner/repo/main/skills/my-skill'
52+
assert setup_environment.convert_to_raw_url(url) == url
53+
54+
def test_gitlab_tree_url(self) -> None:
55+
"""Test converting GitLab tree URL to raw URL."""
56+
url = 'https://gitlab.com/ns/proj/-/tree/main/skills/my-skill'
57+
expected = 'https://gitlab.com/ns/proj/-/raw/main/skills/my-skill'
58+
assert setup_environment.convert_to_raw_url(url) == expected
59+
60+
def test_gitlab_blob_url(self) -> None:
61+
"""Test converting GitLab blob URL to raw URL."""
62+
url = 'https://gitlab.com/ns/proj/-/blob/main/skills/my-skill'
63+
expected = 'https://gitlab.com/ns/proj/-/raw/main/skills/my-skill'
64+
assert setup_environment.convert_to_raw_url(url) == expected
65+
66+
def test_gitlab_self_hosted_tree_url(self) -> None:
67+
"""Test converting self-hosted GitLab tree URL to raw URL."""
68+
url = 'https://gitlab.company.com/group/project/-/tree/develop/path/to/skill'
69+
expected = 'https://gitlab.company.com/group/project/-/raw/develop/path/to/skill'
70+
assert setup_environment.convert_to_raw_url(url) == expected
71+
72+
def test_local_path_unchanged(self) -> None:
73+
"""Test that local paths are returned unchanged."""
74+
path = './skills/local-skill'
75+
assert setup_environment.convert_to_raw_url(path) == path
76+
77+
def test_absolute_local_path_unchanged(self) -> None:
78+
"""Test that absolute local paths are returned unchanged."""
79+
path = '/home/user/skills/local-skill'
80+
assert setup_environment.convert_to_raw_url(path) == path
81+
82+
def test_windows_path_unchanged(self) -> None:
83+
"""Test that Windows paths are returned unchanged."""
84+
path = 'C:\\Users\\user\\skills\\local-skill'
85+
assert setup_environment.convert_to_raw_url(path) == path
86+
87+
def test_github_refs_heads_prefix(self) -> None:
88+
"""Test handling of refs/heads/ prefix in GitHub URLs."""
89+
url = 'https://github.com/owner/repo/tree/refs/heads/main/skills/my-skill'
90+
expected = 'https://raw.githubusercontent.com/owner/repo/main/skills/my-skill'
91+
assert setup_environment.convert_to_raw_url(url) == expected
92+
93+
def test_github_url_with_trailing_slash(self) -> None:
94+
"""Test GitHub URL with trailing slash is handled correctly."""
95+
url = 'https://github.com/owner/repo/tree/main/skills/my-skill/'
96+
expected = 'https://raw.githubusercontent.com/owner/repo/main/skills/my-skill'
97+
assert setup_environment.convert_to_raw_url(url) == expected
98+
99+
def test_github_url_deep_path(self) -> None:
100+
"""Test GitHub URL with deep nested path."""
101+
url = 'https://github.com/org/repo/tree/main/skills/library/context-retrieval-protocol'
102+
expected = (
103+
'https://raw.githubusercontent.com/org/repo/main/'
104+
'skills/library/context-retrieval-protocol'
105+
)
106+
assert setup_environment.convert_to_raw_url(url) == expected
107+
108+
def test_github_url_feature_branch(self) -> None:
109+
"""Test GitHub URL with feature branch name containing slashes."""
110+
url = 'https://github.com/owner/repo/tree/feature/new-skill/skills/my-skill'
111+
expected = (
112+
'https://raw.githubusercontent.com/owner/repo/'
113+
'feature/new-skill/skills/my-skill'
114+
)
115+
assert setup_environment.convert_to_raw_url(url) == expected
116+
117+
def test_gitlab_raw_url_unchanged(self) -> None:
118+
"""Test that GitLab raw URLs are returned unchanged."""
119+
url = 'https://gitlab.com/ns/proj/-/raw/main/skills/my-skill'
120+
assert setup_environment.convert_to_raw_url(url) == url
121+
122+
def test_other_url_unchanged(self) -> None:
123+
"""Test that other URLs (not GitHub/GitLab tree/blob) are returned unchanged."""
124+
url = 'https://example.com/skills/my-skill'
125+
assert setup_environment.convert_to_raw_url(url) == url
126+
127+
def test_http_url_github(self) -> None:
128+
"""Test HTTP (non-HTTPS) GitHub tree URL."""
129+
# GitHub tree URLs should be converted even with HTTP
130+
url = 'http://github.com/owner/repo/tree/main/skills'
131+
# This won't match the pattern because it starts with http://
132+
# The regex specifically checks for https://
133+
# So this is expected behavior - http URLs are returned unchanged
134+
assert setup_environment.convert_to_raw_url(url) == url
135+
136+
34137
class TestValidateSkillFiles:
35138
"""Test skill file validation."""
36139

0 commit comments

Comments
 (0)