From 15ef1c2caf846dea640eaf6d145744a1f3c9d92c Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Mon, 6 Oct 2025 00:28:19 +0530 Subject: [PATCH] Add remote manifest validation against local schema - Validate remote manifests against local schema to prevent runtime errors from schema drift - Validation only applies to remote manifests (HTTP URLs) - Local files continue to work without validation - Uses existing jsonschema dependency with graceful fallback - Provides clear error messages when validation fails Addresses issue #2613 --- sherlock_project/sites.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sherlock_project/sites.py b/sherlock_project/sites.py index b7aaf4c58..b5efe1584 100644 --- a/sherlock_project/sites.py +++ b/sherlock_project/sites.py @@ -145,6 +145,9 @@ def __init__( raise ValueError( f"Problem parsing json contents at '{data_file_path}': {error}." ) + + # Validate remote manifest against local schema + self._validate_remote_manifest(site_data, data_file_path) else: # Reference is to a file. @@ -210,6 +213,22 @@ def __init__( return + def _validate_remote_manifest(self, manifest_data, data_file_path): + """Validate remote manifest against local schema to prevent runtime errors from schema drift.""" + try: + from jsonschema import validate, ValidationError + import os + schema_path = os.path.join(os.path.dirname(__file__), "resources", "data.schema.json") + with open(schema_path, "r", encoding="utf-8") as f: + schema = json.load(f) + validate(instance=manifest_data, schema=schema) + except ImportError: + print("Warning: jsonschema not available, skipping manifest validation.") + except ValidationError as e: + raise ValueError(f"Remote manifest validation failed: {e.message}\nThis may indicate schema drift.") + except FileNotFoundError: + print("Warning: Local schema file not found, skipping validation.") + def remove_nsfw_sites(self, do_not_remove: list = []): """ Remove NSFW sites from the sites, if isNSFW flag is true for site