Skip to content

Commit d2090a8

Browse files
authored
Merge pull request #224 from alex-feel/alex-feel-dev
Handle binary file downloads and display full URLs for skill files in validation output
2 parents a4d8d53 + 33ddfb0 commit d2090a8

File tree

3 files changed

+499
-10
lines changed

3 files changed

+499
-10
lines changed

scripts/setup_environment.py

Lines changed: 130 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,7 +1012,6 @@ def validate_all_config_files(
10121012
for skill_item in skills_list:
10131013
if isinstance(skill_item, dict):
10141014
skill_dict = cast(dict[str, Any], skill_item)
1015-
skill_name = skill_dict.get('name', 'unknown')
10161015
skill_base = skill_dict.get('base', '')
10171016
skill_files = skill_dict.get('files', [])
10181017

@@ -1025,11 +1024,11 @@ def validate_all_config_files(
10251024
# Convert tree/blob URLs to raw URLs for validation
10261025
raw_base = convert_to_raw_url(skill_base)
10271026
full_url = f"{raw_base.rstrip('/')}/{skill_file_item}"
1028-
files_to_check.append(('skill', f'{skill_name}/{skill_file_item}', full_url, True))
1027+
files_to_check.append(('skill', full_url, full_url, True))
10291028
else:
10301029
resolved_base, _ = resolve_resource_path(skill_base, config_source, None)
10311030
full_path = str(Path(resolved_base) / skill_file_item)
1032-
files_to_check.append(('skill', f'{skill_name}/{skill_file_item}', full_path, False))
1031+
files_to_check.append(('skill', full_path, full_path, False))
10331032

10341033
# Validate each file
10351034
info(f'Validating {len(files_to_check)} files...')
@@ -1093,6 +1092,35 @@ def download_file(url: str, destination: Path, force: bool = True) -> bool:
10931092
return False
10941093

10951094

1095+
# Frozen set of binary file extensions (immutable for safety)
1096+
BINARY_EXTENSIONS: frozenset[str] = frozenset([
1097+
# Archives
1098+
'.tar.gz', '.tgz', '.gz', '.zip', '.7z', '.rar',
1099+
'.tar', '.bz2', '.xz', '.lz4', '.zst',
1100+
# Images
1101+
'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.webp', '.svg',
1102+
# Documents
1103+
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
1104+
# Executables
1105+
'.exe', '.dll', '.so', '.dylib',
1106+
# Python
1107+
'.whl', '.pyc', '.pyo',
1108+
])
1109+
1110+
1111+
def is_binary_file(file_path: str | Path) -> bool:
1112+
"""Check if a file is binary based on its extension.
1113+
1114+
Args:
1115+
file_path: Path to the file (can be URL, local path, or filename)
1116+
1117+
Returns:
1118+
bool: True if the file extension indicates a binary file
1119+
"""
1120+
path_str = str(file_path).lower()
1121+
return any(path_str.endswith(ext) for ext in BINARY_EXTENSIONS)
1122+
1123+
10961124
def detect_repo_type(url: str) -> str | None:
10971125
"""Detect the repository type from URL.
10981126
@@ -2562,6 +2590,89 @@ def fetch_url_with_auth(url: str, auth_headers: dict[str, str] | None = None, au
25622590
raise
25632591

25642592

2593+
def fetch_url_bytes_with_auth(
2594+
url: str,
2595+
auth_headers: dict[str, str] | None = None,
2596+
auth_param: str | None = None,
2597+
) -> bytes:
2598+
"""Fetch URL content as bytes, trying without auth first, then with auth if needed.
2599+
2600+
Similar to fetch_url_with_auth but returns raw bytes without decoding.
2601+
Use this for binary files like .tar.gz, .zip, images, etc.
2602+
2603+
Args:
2604+
url: URL to fetch
2605+
auth_headers: Optional pre-computed auth headers
2606+
auth_param: Optional auth parameter for getting headers
2607+
2608+
Returns:
2609+
bytes: Raw content of the URL
2610+
2611+
Raises:
2612+
HTTPError: If the HTTP request fails after authentication attempts
2613+
URLError: If there's a URL/network error (including SSL issues)
2614+
"""
2615+
# Convert GitLab web URLs to API URLs for authentication
2616+
original_url = url
2617+
if detect_repo_type(url) == 'gitlab' and '/-/raw/' in url:
2618+
url = convert_gitlab_url_to_api(url)
2619+
if url != original_url:
2620+
info(f'Using API URL: {url}')
2621+
2622+
# First try without auth (for public repos)
2623+
try:
2624+
request = Request(url)
2625+
response = urlopen(request)
2626+
content: bytes = response.read()
2627+
return content
2628+
except urllib.error.HTTPError as e:
2629+
if e.code in (401, 403, 404):
2630+
# Authentication might be needed
2631+
if not auth_headers:
2632+
auth_headers = get_auth_headers(url, auth_param)
2633+
2634+
if auth_headers:
2635+
info('Retrying with authentication...')
2636+
request = Request(url)
2637+
for header, value in auth_headers.items():
2638+
request.add_header(header, value)
2639+
try:
2640+
response = urlopen(request)
2641+
result: bytes = response.read()
2642+
return result
2643+
except urllib.error.HTTPError as auth_e:
2644+
if auth_e.code == 401:
2645+
error('Authentication failed. Check your token.')
2646+
elif auth_e.code == 403:
2647+
error('Access forbidden. Token may lack permissions.')
2648+
elif auth_e.code == 404:
2649+
error('Resource not found. Check URL and permissions.')
2650+
raise
2651+
elif e.code == 404:
2652+
raise
2653+
else:
2654+
warning('Authentication may be required for this URL')
2655+
raise
2656+
else:
2657+
raise
2658+
except urllib.error.URLError as e:
2659+
if 'SSL' in str(e) or 'certificate' in str(e).lower():
2660+
warning('SSL certificate verification failed, trying with unverified context')
2661+
ctx = ssl.create_default_context()
2662+
ctx.check_hostname = False
2663+
ctx.verify_mode = ssl.CERT_NONE
2664+
2665+
request = Request(url)
2666+
if auth_headers:
2667+
for header, value in auth_headers.items():
2668+
request.add_header(header, value)
2669+
2670+
response = urlopen(request, context=ctx)
2671+
ctx_result: bytes = response.read()
2672+
return ctx_result
2673+
raise
2674+
2675+
25652676
def extract_front_matter(file_path: Path) -> dict[str, Any] | None:
25662677
"""Extract YAML front matter from a Markdown file.
25672678
@@ -2632,8 +2743,14 @@ def handle_resource(
26322743

26332744
if is_remote:
26342745
# Download from URL
2635-
content = fetch_url_with_auth(resolved_path, auth_param=auth_param)
2636-
destination.write_text(content, encoding='utf-8')
2746+
if is_binary_file(resolved_path):
2747+
# Binary file - fetch as bytes and write bytes
2748+
content_bytes = fetch_url_bytes_with_auth(resolved_path, auth_param=auth_param)
2749+
destination.write_bytes(content_bytes)
2750+
else:
2751+
# Text file - fetch as text and write text
2752+
content = fetch_url_with_auth(resolved_path, auth_param=auth_param)
2753+
destination.write_text(content, encoding='utf-8')
26372754
success(f'Downloaded: {filename}')
26382755
else:
26392756
# Copy from local path
@@ -2888,8 +3005,14 @@ def process_skill(
28883005
raw_base = convert_to_raw_url(base)
28893006
source_url = f"{raw_base.rstrip('/')}/{file_path}"
28903007
try:
2891-
content = fetch_url_with_auth(source_url, auth_param=auth_param)
2892-
destination.write_text(content, encoding='utf-8')
3008+
if is_binary_file(file_path):
3009+
# Binary file - fetch as bytes and write bytes
3010+
content_bytes = fetch_url_bytes_with_auth(source_url, auth_param=auth_param)
3011+
destination.write_bytes(content_bytes)
3012+
else:
3013+
# Text file - fetch as text and write text
3014+
content = fetch_url_with_auth(source_url, auth_param=auth_param)
3015+
destination.write_text(content, encoding='utf-8')
28933016
success(f' Downloaded: {file_path}')
28943017
success_count += 1
28953018
except Exception as e:

0 commit comments

Comments
 (0)