feat: Phase 2 Task 4 - Replace manual config validation with Pydantic (#134)

jeremyeder · claude · web-flow · commit d83cf58a6eb0 · 2025-11-24T12:08:09.000-05:00
Replaces 85 lines of manual YAML validation code with declarative Pydantic models for automatic type checking and validation. Changes: - Add pydantic>=2.0.0 dependency to pyproject.toml - Refactor Config model (models/config.py) from dataclass to Pydantic BaseModel - Add field validators for weights, language_overrides, custom_theme - Add model validator for weights sum constraint - Add from_yaml_dict() classmethod for YAML loading - Preserve backwards-compatible API (to_dict, get_weight, is_excluded) - Security: Still uses validate_path() from security utils - Use modern ConfigDict (Pydantic V2) instead of deprecated class Config - Simplify load_config() in cli/main.py (-67 lines of manual validation) - Simplify _load_config() in cli/assess_batch.py (-63 lines) - Remove validate_config_dict import (no longer needed in CLI) Benefits: - Automatic type checking and validation - Better error messages with field locations - JSON schema generation capability (for future use) - Centralized validation logic in model - Reduced code duplication (2 load_config implementations → 1 model method) LOC Impact: - config.py: +101 lines (79 → 180) - Pydantic validators - cli/main.py: -28 lines (384 → 356) - Removed manual validation - cli/assess_batch.py: -23 lines (532 → 509) - Removed manual validation - Net: +50 LOC (vs. -100 target, but gains outweigh LOC metric) Tests: All config model tests pass (3 tests, no warnings) Security: Path validation still uses centralized security utils Phase 2 Task 4 of 6 complete. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
     "anthropic>=0.74.0",
     "jsonschema>=4.17.0",
     "requests>=2.31.0",
+    "pydantic>=2.0.0",
 ]
 
 [project.optional-dependencies]
diff --git a/src/agentready/cli/assess_batch.py b/src/agentready/cli/assess_batch.py
@@ -12,7 +12,7 @@
 from ..reporters.html import HTMLReporter
 from ..reporters.markdown import MarkdownReporter
 from ..services.batch_scanner import BatchScanner
-from ..utils.security import validate_config_dict, validate_path
+from pydantic import ValidationError
 
 
 def _get_agentready_version() -> str:
@@ -29,67 +29,44 @@ def _get_agentready_version() -> str:
 
 
 def _load_config(config_path: Path) -> Config:
-    """Load configuration from YAML file with validation.
+    """Load configuration from YAML file with Pydantic validation.
 
-    Uses centralized security utilities from utils.security module.
-    """
-    import yaml
+    Uses Pydantic for automatic validation, replacing duplicated manual
+    validation code with the Config.from_yaml_dict() classmethod.
 
-    with open(config_path, "r", encoding="utf-8") as f:
-        data = yaml.safe_load(f)
-
-    # Define config schema for validation
-    schema = {
-        "weights": {str: (int, float)},
-        "excluded_attributes": [str],
-        "language_overrides": {str: list},
-        "output_dir": str,
-        "report_theme": str,
-        "custom_theme": dict,
-    }
-
-    # Validate config structure using centralized utility
-    validated = validate_config_dict(data, schema)
-
-    # Additional nested validations for complex types
-    if "language_overrides" in validated:
-        for lang, patterns in validated["language_overrides"].items():
-            if not isinstance(patterns, list):
-                raise ValueError(
-                    f"'language_overrides' values must be lists, got {type(patterns).__name__}"
-                )
-            for pattern in patterns:
-                if not isinstance(pattern, str):
-                    raise ValueError(
-                        f"'language_overrides' patterns must be strings, got {type(pattern).__name__}"
-                    )
-
-    if "custom_theme" in validated:
-        for key, value in validated["custom_theme"].items():
-            if not isinstance(key, str):
-                raise ValueError(
-                    f"'custom_theme' keys must be strings, got {type(key).__name__}"
-                )
-            if not isinstance(value, str):
-                raise ValueError(
-                    f"'custom_theme' values must be strings, got {type(value).__name__}"
-                )
+    Args:
+        config_path: Path to YAML configuration file
 
-    # Validate and sanitize output_dir path
-    output_dir = None
-    if "output_dir" in validated:
-        output_dir = validate_path(
-            validated["output_dir"], allow_system_dirs=False, must_exist=False
-        )
+    Returns:
+        Validated Config instance
 
-    return Config(
-        weights=validated.get("weights", {}),
-        excluded_attributes=validated.get("excluded_attributes", []),
-        language_overrides=validated.get("language_overrides", {}),
-        output_dir=output_dir,
-        report_theme=validated.get("report_theme", "default"),
-        custom_theme=validated.get("custom_theme"),
-    )
+    Raises:
+        ValidationError: If YAML data doesn't match expected schema
+        FileNotFoundError: If config file doesn't exist
+        yaml.YAMLError: If YAML parsing fails
+    """
+    import sys
+
+    import yaml
+
+    try:
+        with open(config_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+
+        # Pydantic handles all validation automatically
+        return Config.from_yaml_dict(data)
+    except ValidationError as e:
+        # Convert Pydantic validation errors to user-friendly messages
+        errors = []
+        for error in e.errors():
+            field = " → ".join(str(x) for x in error["loc"])
+            msg = error["msg"]
+            errors.append(f"  - {field}: {msg}")
+
+        click.echo("Configuration validation failed:", err=True)
+        for error in errors:
+            click.echo(error, err=True)
+        sys.exit(1)
 
 
 def _generate_multi_reports(batch_assessment, output_path: Path, verbose: bool) -> None:
diff --git a/src/agentready/cli/main.py b/src/agentready/cli/main.py
@@ -18,7 +18,7 @@
 from ..reporters.markdown import MarkdownReporter
 from ..services.research_loader import ResearchLoader
 from ..services.scanner import Scanner
-from ..utils.security import validate_config_dict, validate_path
+from pydantic import ValidationError
 from ..utils.subprocess_utils import safe_subprocess_run
 from .align import align
 from .assess_batch import assess_batch
@@ -242,73 +242,45 @@ def run_assessment(repository_path, verbose, output_dir, config_path):
 
 
 def load_config(config_path: Path) -> Config:
-    """Load configuration from YAML file with validation.
+    """Load configuration from YAML file with Pydantic validation.
 
-    Security: Validates YAML structure to prevent injection attacks
-    and malformed data from causing crashes or unexpected behavior.
-    Uses centralized security utilities from utils.security module.
+    Uses Pydantic for automatic validation, replacing 67 lines of manual
+    validation code with declarative field validators.
+
+    Security: Uses yaml.safe_load() for safe YAML parsing and Pydantic
+    validators for type checking and path sanitization.
+
+    Args:
+        config_path: Path to YAML configuration file
+
+    Returns:
+        Validated Config instance
+
+    Raises:
+        ValidationError: If YAML data doesn't match expected schema
+        FileNotFoundError: If config file doesn't exist
+        yaml.YAMLError: If YAML parsing fails
     """
     import yaml
 
-    with open(config_path, "r", encoding="utf-8") as f:
-        data = yaml.safe_load(f)
-
-    # Define config schema for validation
-    schema = {
-        "weights": {str: (int, float)},  # dict[str, int|float]
-        "excluded_attributes": [str],  # list[str]
-        "language_overrides": {
-            str: list
-        },  # dict[str, list] (nested list validated separately)
-        "output_dir": str,
-        "report_theme": str,
-        "custom_theme": dict,  # dict (nested types validated separately)
-    }
-
-    # Validate config structure using centralized utility
-    validated = validate_config_dict(data, schema)
-
-    # Additional nested validations for complex types
-    if "language_overrides" in validated:
-        lang_overrides = validated["language_overrides"]
-        for lang, patterns in lang_overrides.items():
-            if not isinstance(patterns, list):
-                raise ValueError(
-                    f"'language_overrides' values must be lists, got {type(patterns).__name__}"
-                )
-            for pattern in patterns:
-                if not isinstance(pattern, str):
-                    raise ValueError(
-                        f"'language_overrides' patterns must be strings, got {type(pattern).__name__}"
-                    )
-
-    if "custom_theme" in validated:
-        custom_theme = validated["custom_theme"]
-        for key, value in custom_theme.items():
-            if not isinstance(key, str):
-                raise ValueError(
-                    f"'custom_theme' keys must be strings, got {type(key).__name__}"
-                )
-            if not isinstance(value, str):
-                raise ValueError(
-                    f"'custom_theme' values must be strings, got {type(value).__name__}"
-                )
-
-    # Validate and sanitize output_dir path
-    output_dir = None
-    if "output_dir" in validated:
-        output_dir = validate_path(
-            validated["output_dir"], allow_system_dirs=False, must_exist=False
-        )
-
-    return Config(
-        weights=validated.get("weights", {}),
-        excluded_attributes=validated.get("excluded_attributes", []),
-        language_overrides=validated.get("language_overrides", {}),
-        output_dir=output_dir,
-        report_theme=validated.get("report_theme", "default"),
-        custom_theme=validated.get("custom_theme"),
-    )
+    try:
+        with open(config_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+
+        # Pydantic handles all validation automatically
+        return Config.from_yaml_dict(data)
+    except ValidationError as e:
+        # Convert Pydantic validation errors to user-friendly messages
+        errors = []
+        for error in e.errors():
+            field = " → ".join(str(x) for x in error["loc"])
+            msg = error["msg"]
+            errors.append(f"  - {field}: {msg}")
+
+        click.echo("Configuration validation failed:", err=True)
+        for error in errors:
+            click.echo(error, err=True)
+        sys.exit(1)
 
 
 @cli.command()
diff --git a/src/agentready/models/config.py b/src/agentready/models/config.py

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,7 @@ dependencies = [`
`25`	`25`	`"anthropic>=0.74.0",`
`26`	`26`	`"jsonschema>=4.17.0",`
`27`	`27`	`"requests>=2.31.0",`
	`28`	`+ "pydantic>=2.0.0",`
`28`	`29`	`]`
`29`	`30`
`30`	`31`	`[project.optional-dependencies]`