@@ -1012,7 +1012,6 @@ def validate_all_config_files(
10121012 for skill_item in skills_list :
10131013 if isinstance (skill_item , dict ):
10141014 skill_dict = cast (dict [str , Any ], skill_item )
1015- skill_name = skill_dict .get ('name' , 'unknown' )
10161015 skill_base = skill_dict .get ('base' , '' )
10171016 skill_files = skill_dict .get ('files' , [])
10181017
@@ -1025,11 +1024,11 @@ def validate_all_config_files(
10251024 # Convert tree/blob URLs to raw URLs for validation
10261025 raw_base = convert_to_raw_url (skill_base )
10271026 full_url = f"{ raw_base .rstrip ('/' )} /{ skill_file_item } "
1028- files_to_check .append (('skill' , f' { skill_name } / { skill_file_item } ' , full_url , True ))
1027+ files_to_check .append (('skill' , full_url , full_url , True ))
10291028 else :
10301029 resolved_base , _ = resolve_resource_path (skill_base , config_source , None )
10311030 full_path = str (Path (resolved_base ) / skill_file_item )
1032- files_to_check .append (('skill' , f' { skill_name } / { skill_file_item } ' , full_path , False ))
1031+ files_to_check .append (('skill' , full_path , full_path , False ))
10331032
10341033 # Validate each file
10351034 info (f'Validating { len (files_to_check )} files...' )
@@ -1093,6 +1092,35 @@ def download_file(url: str, destination: Path, force: bool = True) -> bool:
10931092 return False
10941093
10951094
1095+ # Frozen set of binary file extensions (immutable for safety)
1096+ BINARY_EXTENSIONS : frozenset [str ] = frozenset ([
1097+ # Archives
1098+ '.tar.gz' , '.tgz' , '.gz' , '.zip' , '.7z' , '.rar' ,
1099+ '.tar' , '.bz2' , '.xz' , '.lz4' , '.zst' ,
1100+ # Images
1101+ '.png' , '.jpg' , '.jpeg' , '.gif' , '.bmp' , '.ico' , '.webp' , '.svg' ,
1102+ # Documents
1103+ '.pdf' , '.doc' , '.docx' , '.xls' , '.xlsx' , '.ppt' , '.pptx' ,
1104+ # Executables
1105+ '.exe' , '.dll' , '.so' , '.dylib' ,
1106+ # Python
1107+ '.whl' , '.pyc' , '.pyo' ,
1108+ ])
1109+
1110+
1111+ def is_binary_file (file_path : str | Path ) -> bool :
1112+ """Check if a file is binary based on its extension.
1113+
1114+ Args:
1115+ file_path: Path to the file (can be URL, local path, or filename)
1116+
1117+ Returns:
1118+ bool: True if the file extension indicates a binary file
1119+ """
1120+ path_str = str (file_path ).lower ()
1121+ return any (path_str .endswith (ext ) for ext in BINARY_EXTENSIONS )
1122+
1123+
10961124def detect_repo_type (url : str ) -> str | None :
10971125 """Detect the repository type from URL.
10981126
@@ -2562,6 +2590,89 @@ def fetch_url_with_auth(url: str, auth_headers: dict[str, str] | None = None, au
25622590 raise
25632591
25642592
2593+ def fetch_url_bytes_with_auth (
2594+ url : str ,
2595+ auth_headers : dict [str , str ] | None = None ,
2596+ auth_param : str | None = None ,
2597+ ) -> bytes :
2598+ """Fetch URL content as bytes, trying without auth first, then with auth if needed.
2599+
2600+ Similar to fetch_url_with_auth but returns raw bytes without decoding.
2601+ Use this for binary files like .tar.gz, .zip, images, etc.
2602+
2603+ Args:
2604+ url: URL to fetch
2605+ auth_headers: Optional pre-computed auth headers
2606+ auth_param: Optional auth parameter for getting headers
2607+
2608+ Returns:
2609+ bytes: Raw content of the URL
2610+
2611+ Raises:
2612+ HTTPError: If the HTTP request fails after authentication attempts
2613+ URLError: If there's a URL/network error (including SSL issues)
2614+ """
2615+ # Convert GitLab web URLs to API URLs for authentication
2616+ original_url = url
2617+ if detect_repo_type (url ) == 'gitlab' and '/-/raw/' in url :
2618+ url = convert_gitlab_url_to_api (url )
2619+ if url != original_url :
2620+ info (f'Using API URL: { url } ' )
2621+
2622+ # First try without auth (for public repos)
2623+ try :
2624+ request = Request (url )
2625+ response = urlopen (request )
2626+ content : bytes = response .read ()
2627+ return content
2628+ except urllib .error .HTTPError as e :
2629+ if e .code in (401 , 403 , 404 ):
2630+ # Authentication might be needed
2631+ if not auth_headers :
2632+ auth_headers = get_auth_headers (url , auth_param )
2633+
2634+ if auth_headers :
2635+ info ('Retrying with authentication...' )
2636+ request = Request (url )
2637+ for header , value in auth_headers .items ():
2638+ request .add_header (header , value )
2639+ try :
2640+ response = urlopen (request )
2641+ result : bytes = response .read ()
2642+ return result
2643+ except urllib .error .HTTPError as auth_e :
2644+ if auth_e .code == 401 :
2645+ error ('Authentication failed. Check your token.' )
2646+ elif auth_e .code == 403 :
2647+ error ('Access forbidden. Token may lack permissions.' )
2648+ elif auth_e .code == 404 :
2649+ error ('Resource not found. Check URL and permissions.' )
2650+ raise
2651+ elif e .code == 404 :
2652+ raise
2653+ else :
2654+ warning ('Authentication may be required for this URL' )
2655+ raise
2656+ else :
2657+ raise
2658+ except urllib .error .URLError as e :
2659+ if 'SSL' in str (e ) or 'certificate' in str (e ).lower ():
2660+ warning ('SSL certificate verification failed, trying with unverified context' )
2661+ ctx = ssl .create_default_context ()
2662+ ctx .check_hostname = False
2663+ ctx .verify_mode = ssl .CERT_NONE
2664+
2665+ request = Request (url )
2666+ if auth_headers :
2667+ for header , value in auth_headers .items ():
2668+ request .add_header (header , value )
2669+
2670+ response = urlopen (request , context = ctx )
2671+ ctx_result : bytes = response .read ()
2672+ return ctx_result
2673+ raise
2674+
2675+
25652676def extract_front_matter (file_path : Path ) -> dict [str , Any ] | None :
25662677 """Extract YAML front matter from a Markdown file.
25672678
@@ -2632,8 +2743,14 @@ def handle_resource(
26322743
26332744 if is_remote :
26342745 # Download from URL
2635- content = fetch_url_with_auth (resolved_path , auth_param = auth_param )
2636- destination .write_text (content , encoding = 'utf-8' )
2746+ if is_binary_file (resolved_path ):
2747+ # Binary file - fetch as bytes and write bytes
2748+ content_bytes = fetch_url_bytes_with_auth (resolved_path , auth_param = auth_param )
2749+ destination .write_bytes (content_bytes )
2750+ else :
2751+ # Text file - fetch as text and write text
2752+ content = fetch_url_with_auth (resolved_path , auth_param = auth_param )
2753+ destination .write_text (content , encoding = 'utf-8' )
26372754 success (f'Downloaded: { filename } ' )
26382755 else :
26392756 # Copy from local path
@@ -2888,8 +3005,14 @@ def process_skill(
28883005 raw_base = convert_to_raw_url (base )
28893006 source_url = f"{ raw_base .rstrip ('/' )} /{ file_path } "
28903007 try :
2891- content = fetch_url_with_auth (source_url , auth_param = auth_param )
2892- destination .write_text (content , encoding = 'utf-8' )
3008+ if is_binary_file (file_path ):
3009+ # Binary file - fetch as bytes and write bytes
3010+ content_bytes = fetch_url_bytes_with_auth (source_url , auth_param = auth_param )
3011+ destination .write_bytes (content_bytes )
3012+ else :
3013+ # Text file - fetch as text and write text
3014+ content = fetch_url_with_auth (source_url , auth_param = auth_param )
3015+ destination .write_text (content , encoding = 'utf-8' )
28933016 success (f' Downloaded: { file_path } ' )
28943017 success_count += 1
28953018 except Exception as e :
0 commit comments