Skip to content

Commit be4fb9d

Browse files
authored
Merge pull request #226 from alex-feel/alex-feel-dev
Consolidate file validation into FileValidator class
2 parents 7ebbfe3 + 81fbf0e commit be4fb9d

File tree

3 files changed

+671
-531
lines changed

3 files changed

+671
-531
lines changed

scripts/setup_environment.py

Lines changed: 161 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -831,99 +831,172 @@ def expand_env_vars(value: str) -> str:
831831
return True
832832

833833

834-
def check_file_with_head(url: str, auth_headers: dict[str, str] | None = None) -> bool:
835-
"""Check if file exists using HEAD request.
834+
class FileValidator:
835+
"""Validates file availability for both remote URLs and local paths.
836836
837-
Args:
838-
url: URL to check
839-
auth_headers: Optional authentication headers
837+
Handles authentication automatically based on the URL being validated,
838+
supporting GitHub and GitLab private repositories. For remote files,
839+
attempts HEAD request first, then falls back to Range request.
840840
841-
Returns:
842-
True if file is accessible, False otherwise
841+
Attributes:
842+
auth_param: Optional authentication parameter. Accepts token value
843+
(auto-detects header based on URL) or explicit header:value format.
843844
"""
844-
try:
845-
request = Request(url, method='HEAD')
846-
if auth_headers:
847-
for header, value in auth_headers.items():
848-
request.add_header(header, value)
849845

846+
def __init__(self, auth_param: str | None = None) -> None:
847+
"""Initialize FileValidator.
848+
849+
Args:
850+
auth_param: Optional auth parameter in format "header:value" or "header=value"
851+
or just a token (will auto-detect header based on URL)
852+
"""
853+
self.auth_param = auth_param
854+
self._validation_results: list[tuple[str, str, bool, str]] = []
855+
856+
def validate_remote_url(self, url: str) -> tuple[bool, str]:
857+
"""Validate a remote URL with per-URL authentication.
858+
859+
Generates authentication headers specific to this URL (GitHub vs GitLab)
860+
and attempts validation using HEAD request, then Range request as fallback.
861+
862+
Args:
863+
url: Remote URL to validate
864+
865+
Returns:
866+
Tuple of (is_valid, method_used)
867+
method_used is 'HEAD', 'Range', or 'None'
868+
"""
869+
# Convert GitLab web URLs to API format
870+
original_url = url
871+
if detect_repo_type(url) == 'gitlab' and '/-/raw/' in url:
872+
url = convert_gitlab_url_to_api(url)
873+
if url != original_url:
874+
info(f'Using API URL for validation: {url}')
875+
876+
# Generate auth headers for THIS specific URL
877+
auth_headers = get_auth_headers(url, self.auth_param)
878+
879+
# Try HEAD request first
880+
if self._check_with_head(url, auth_headers):
881+
return (True, 'HEAD')
882+
883+
# Fallback to Range request
884+
if self._check_with_range(url, auth_headers):
885+
return (True, 'Range')
886+
887+
return (False, 'None')
888+
889+
def validate_local_path(self, path: str) -> tuple[bool, str]:
890+
"""Validate a local file path.
891+
892+
Args:
893+
path: Local file path to validate
894+
895+
Returns:
896+
Tuple of (is_valid, 'Local')
897+
"""
898+
local_path = Path(path)
899+
if local_path.exists() and local_path.is_file():
900+
return (True, 'Local')
901+
return (False, 'Local')
902+
903+
def validate(self, url_or_path: str, is_remote: bool) -> tuple[bool, str]:
904+
"""Validate a file, automatically choosing remote or local validation.
905+
906+
Args:
907+
url_or_path: URL or local path to validate
908+
is_remote: True if this is a remote URL, False if local path
909+
910+
Returns:
911+
Tuple of (is_valid, method_used)
912+
"""
913+
if is_remote:
914+
return self.validate_remote_url(url_or_path)
915+
return self.validate_local_path(url_or_path)
916+
917+
def _check_with_head(self, url: str, auth_headers: dict[str, str] | None) -> bool:
918+
"""Check URL availability using HEAD request.
919+
920+
Args:
921+
url: URL to check
922+
auth_headers: Optional authentication headers
923+
924+
Returns:
925+
True if file is accessible, False otherwise
926+
"""
850927
try:
851-
response = urlopen(request)
852-
return bool(response.status == 200)
853-
except urllib.error.URLError as e:
854-
if 'SSL' in str(e) or 'certificate' in str(e).lower():
855-
# Try with unverified SSL context
856-
ctx = ssl.create_default_context()
857-
ctx.check_hostname = False
858-
ctx.verify_mode = ssl.CERT_NONE
859-
response = urlopen(request, context=ctx)
928+
request = Request(url, method='HEAD')
929+
if auth_headers:
930+
for header, value in auth_headers.items():
931+
request.add_header(header, value)
932+
933+
try:
934+
response = urlopen(request)
860935
return bool(response.status == 200)
936+
except urllib.error.URLError as e:
937+
if 'SSL' in str(e) or 'certificate' in str(e).lower():
938+
# Try with unverified SSL context
939+
ctx = ssl.create_default_context()
940+
ctx.check_hostname = False
941+
ctx.verify_mode = ssl.CERT_NONE
942+
response = urlopen(request, context=ctx)
943+
return bool(response.status == 200)
944+
return False
945+
except (urllib.error.HTTPError, Exception):
861946
return False
862-
except (urllib.error.HTTPError, Exception):
863-
return False
864-
865947

866-
def check_file_with_range(url: str, auth_headers: dict[str, str] | None = None) -> bool:
867-
"""Check if file exists using Range request (first byte only).
948+
def _check_with_range(self, url: str, auth_headers: dict[str, str] | None) -> bool:
949+
"""Check URL availability using Range request.
868950
869-
Args:
870-
url: URL to check
871-
auth_headers: Optional authentication headers
872-
873-
Returns:
874-
True if file is accessible, False otherwise
875-
"""
876-
try:
877-
request = Request(url)
878-
request.add_header('Range', 'bytes=0-0')
879-
if auth_headers:
880-
for header, value in auth_headers.items():
881-
request.add_header(header, value)
951+
Args:
952+
url: URL to check
953+
auth_headers: Optional authentication headers
882954
955+
Returns:
956+
True if file is accessible, False otherwise
957+
"""
883958
try:
884-
response = urlopen(request)
885-
# Accept both 200 (full content) and 206 (partial content)
886-
return response.status in (200, 206)
887-
except urllib.error.URLError as e:
888-
if 'SSL' in str(e) or 'certificate' in str(e).lower():
889-
# Try with unverified SSL context
890-
ctx = ssl.create_default_context()
891-
ctx.check_hostname = False
892-
ctx.verify_mode = ssl.CERT_NONE
893-
response = urlopen(request, context=ctx)
959+
request = Request(url)
960+
request.add_header('Range', 'bytes=0-0')
961+
if auth_headers:
962+
for header, value in auth_headers.items():
963+
request.add_header(header, value)
964+
965+
try:
966+
response = urlopen(request)
967+
# Accept both 200 (full content) and 206 (partial content)
894968
return response.status in (200, 206)
969+
except urllib.error.URLError as e:
970+
if 'SSL' in str(e) or 'certificate' in str(e).lower():
971+
# Try with unverified SSL context
972+
ctx = ssl.create_default_context()
973+
ctx.check_hostname = False
974+
ctx.verify_mode = ssl.CERT_NONE
975+
response = urlopen(request, context=ctx)
976+
return response.status in (200, 206)
977+
return False
978+
except (urllib.error.HTTPError, Exception):
895979
return False
896-
except (urllib.error.HTTPError, Exception):
897-
return False
898980

981+
@property
982+
def results(self) -> list[tuple[str, str, bool, str]]:
983+
"""Get accumulated validation results."""
984+
return self._validation_results
899985

900-
def validate_file_availability(url: str, auth_headers: dict[str, str] | None = None) -> tuple[bool, str]:
901-
"""Validate file availability using HEAD first, then Range as fallback.
986+
def add_result(self, file_type: str, original_path: str, is_valid: bool, method: str) -> None:
987+
"""Record a validation result.
902988
903-
Args:
904-
url: URL to check
905-
auth_headers: Optional authentication headers
989+
Args:
990+
file_type: Type of file (agent, skill, hook, etc.)
991+
original_path: Original path from config
992+
is_valid: Whether validation passed
993+
method: Validation method used
994+
"""
995+
self._validation_results.append((file_type, original_path, is_valid, method))
906996

907-
Returns:
908-
Tuple of (is_available, method_used)
909-
"""
910-
# Convert GitLab web URLs to API URLs for accurate validation
911-
# (same as done in fetch_url_with_auth during download)
912-
original_url = url
913-
if detect_repo_type(url) == 'gitlab' and '/-/raw/' in url:
914-
url = convert_gitlab_url_to_api(url)
915-
if url != original_url:
916-
info(f'Using API URL for validation: {url}')
917-
918-
# Try HEAD request first
919-
if check_file_with_head(url, auth_headers):
920-
return (True, 'HEAD')
921-
922-
# Fallback to Range request
923-
if check_file_with_range(url, auth_headers):
924-
return (True, 'Range')
925-
926-
return (False, 'None')
997+
def clear_results(self) -> None:
998+
"""Clear accumulated validation results."""
999+
self._validation_results.clear()
9271000

9281001

9291002
def validate_all_config_files(
@@ -945,10 +1018,9 @@ def validate_all_config_files(
9451018
files_to_check: list[tuple[str, str, str, bool]] = []
9461019
results: list[tuple[str, str, bool, str]] = []
9471020

948-
# Get authentication headers if needed
949-
auth_headers = None
950-
if config_source.startswith('http'):
951-
auth_headers = get_auth_headers(config_source, auth_param)
1021+
# Create file validator - generates authentication per-URL for proper
1022+
# handling of mixed repositories (e.g., GitHub + GitLab files)
1023+
validator = FileValidator(auth_param)
9521024

9531025
# Collect all files that need to be validated
9541026
base_url = config.get('base-url')
@@ -1035,26 +1107,21 @@ def validate_all_config_files(
10351107
all_valid = True
10361108

10371109
for file_type, original_path, resolved_path, is_remote in files_to_check:
1038-
if is_remote:
1039-
# Validate remote URL
1040-
is_valid, method = validate_file_availability(resolved_path, auth_headers)
1041-
results.append((file_type, original_path, is_valid, method))
1110+
# Use FileValidator for unified validation with per-URL authentication
1111+
is_valid, method = validator.validate(resolved_path, is_remote)
1112+
results.append((file_type, original_path, is_valid, method))
10421113

1043-
if is_valid:
1114+
if is_valid:
1115+
if is_remote:
10441116
info(f' [OK] {file_type}: {original_path} (remote, validated via {method})')
10451117
else:
1046-
error(f' [FAIL] {file_type}: {original_path} (remote, not accessible)')
1047-
all_valid = False
1048-
else:
1049-
# Validate local file
1050-
local_path = Path(resolved_path)
1051-
if local_path.exists() and local_path.is_file():
1052-
results.append((file_type, original_path, True, 'Local'))
10531118
info(f' [OK] {file_type}: {original_path} (local file exists)')
1119+
else:
1120+
if is_remote:
1121+
error(f' [FAIL] {file_type}: {original_path} (remote, not accessible)')
10541122
else:
1055-
results.append((file_type, original_path, False, 'Local'))
10561123
error(f' [FAIL] {file_type}: {original_path} (local file not found at {resolved_path})')
1057-
all_valid = False
1124+
all_valid = False
10581125

10591126
return all_valid, results
10601127

@@ -2889,64 +2956,6 @@ def process_file_downloads(
28892956
return True
28902957

28912958

2892-
def validate_skill_files(
2893-
skill_config: dict[str, Any],
2894-
config_source: str,
2895-
auth_param: str | None = None,
2896-
) -> tuple[bool, list[tuple[str, bool, str]]]:
2897-
"""Validate all files in a skill configuration before download.
2898-
2899-
Checks that all files specified in the skill configuration are accessible.
2900-
For remote skills, this uses HEAD/Range requests to verify URL accessibility.
2901-
For local skills, this checks that the files exist on disk.
2902-
2903-
Args:
2904-
skill_config: Skill configuration dict with 'name', 'base', and 'files' keys
2905-
config_source: Where the config was loaded from (URL or local path)
2906-
auth_param: Optional authentication parameter for private repos
2907-
2908-
Returns:
2909-
Tuple of (all_valid, validation_results)
2910-
validation_results is a list of (file_path, is_valid, method) tuples
2911-
"""
2912-
skill_name = skill_config.get('name', 'unknown')
2913-
base = skill_config.get('base', '')
2914-
files = skill_config.get('files', [])
2915-
2916-
results: list[tuple[str, bool, str]] = []
2917-
all_valid = True
2918-
2919-
# Check if SKILL.md is in files list (required per Claude documentation)
2920-
if 'SKILL.md' not in files:
2921-
error(f"Skill '{skill_name}': SKILL.md is required but not in files list")
2922-
all_valid = False
2923-
2924-
# Validate each file
2925-
for file_path in files:
2926-
if not isinstance(file_path, str):
2927-
continue
2928-
2929-
# Build full path: base + file_path
2930-
if base.startswith(('http://', 'https://')):
2931-
# Remote base - convert tree/blob URLs to raw URLs
2932-
raw_base = convert_to_raw_url(base)
2933-
full_url = f"{raw_base.rstrip('/')}/{file_path}"
2934-
auth_headers = get_auth_headers(full_url, auth_param)
2935-
is_valid, method = validate_file_availability(full_url, auth_headers)
2936-
else:
2937-
# Local base - resolve path
2938-
resolved_base, _ = resolve_resource_path(base, config_source, None)
2939-
full_path = Path(resolved_base) / file_path
2940-
is_valid = full_path.exists() and full_path.is_file()
2941-
method = 'Local'
2942-
2943-
results.append((file_path, is_valid, method))
2944-
if not is_valid:
2945-
all_valid = False
2946-
2947-
return all_valid, results
2948-
2949-
29502959
def process_skill(
29512960
skill_config: dict[str, Any],
29522961
skills_dir: Path,

0 commit comments

Comments
 (0)