Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions sherlock_project/sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ def __init__(
raise ValueError(
f"Problem parsing json contents at '{data_file_path}': {error}."
)

# Validate remote manifest against local schema
self._validate_remote_manifest(site_data, data_file_path)

else:
# Reference is to a file.
Expand Down Expand Up @@ -210,6 +213,22 @@ def __init__(

return

def _validate_remote_manifest(self, manifest_data, data_file_path):
"""Validate remote manifest against local schema to prevent runtime errors from schema drift."""
try:
from jsonschema import validate, ValidationError
import os
schema_path = os.path.join(os.path.dirname(__file__), "resources", "data.schema.json")
with open(schema_path, "r", encoding="utf-8") as f:
schema = json.load(f)
validate(instance=manifest_data, schema=schema)
except ImportError:
print("Warning: jsonschema not available, skipping manifest validation.")
except ValidationError as e:
raise ValueError(f"Remote manifest validation failed: {e.message}\nThis may indicate schema drift.")
except FileNotFoundError:
print("Warning: Local schema file not found, skipping validation.")

def remove_nsfw_sites(self, do_not_remove: list = []):
"""
Remove NSFW sites from the sites, if isNSFW flag is true for site
Expand Down