fix: resolve merge conflict - allow weight boosting in Pydantic Config model

jeremyeder · claude · jeremyeder · commit 3994d152bd15 · 2025-11-24T12:10:02.000-05:00
Merged main's Pydantic BaseModel refactor while preserving our weight boosting fix. Changes from main: - Migrated Config from dataclass to Pydantic BaseModel - Added field validators for all config fields - Added model_dump() for JSON serialization Changes from our branch (preserved): - Removed weight <= 1.0 restriction (allows boosting) - Removed weights sum to 1.0 validation - Allows weights like claude_md_file: 2.0 for heavy penalties Result: Pydantic validation with weight boosting support 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,10 @@
+# [2.4.0](https://github.com/ambient-code/agentready/compare/v2.3.0...v2.4.0) (2025-11-24)
+
+
+### Features
+
+* Phase 2 Task 4 - Replace manual config validation with Pydantic ([#134](https://github.com/ambient-code/agentready/issues/134)) ([d83cf58](https://github.com/ambient-code/agentready/commit/d83cf58a6eb0b1f889a3cfc49a8fb816db2a1f3e))
+
 # [2.3.0](https://github.com/ambient-code/agentready/compare/v2.2.0...v2.3.0) (2025-11-24)
 
 
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -10,7 +10,7 @@
 
 AgentReady is a Python CLI tool that evaluates repositories against 25 carefully researched attributes that make codebases more effective for AI-assisted development. It generates interactive HTML reports, version-control friendly Markdown reports, and machine-readable JSON output.
 
-**Current Status**: v2.3.0 - Core assessment engine complete, 22/31 attributes implemented (9 stubs), LLM-powered learning, research report management
+**Current Status**: v2.4.0 - Core assessment engine complete, 22/31 attributes implemented (9 stubs), LLM-powered learning, research report management
 
 **Self-Assessment Score**: 80.0/100 (Gold) - See `examples/self-assessment/`
 
@@ -622,5 +622,5 @@ Use the @agent-github-pages-docs to revise all documentation in docs/ based on:
 ---
 
 **Last Updated**: 2025-11-24 by Jeremy Eder
-**AgentReady Version**: 2.3.0
+**AgentReady Version**: 2.4.0
 **Self-Assessment**: 80.0/100 (Gold) ✨
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -16,13 +16,19 @@ Please read and follow our [Code of Conduct](CODE_OF_CONDUCT.md).
 1. Fork the repository
 2. Clone your fork:
    ```bash
-   git clone https://github.com/YOUR_USERNAME/PROJECT_NAME.git
-   cd PROJECT_NAME
+   git clone https://github.com/YOUR_USERNAME/agentready.git
+   cd agentready
    ```
 
 3. Set up your development environment:
 
-4. Install pre-commit hooks:
+4. Add the upstream remote (to sync with the original repository):
+   ```bash
+   git remote add upstream https://github.com/ambient-code/agentready.git
+   git fetch upstream
+   ```
+
+5. Install pre-commit hooks:
    ```bash
    pre-commit install
    ```
diff --git a/README.md b/README.md
@@ -63,16 +63,16 @@ For one-time analysis without infrastructure changes:
 
 ```bash
 # Assess current repository
-agentready .
+agentready assess .
 
 # Assess another repository
-agentready /path/to/your/repo
+agentready assess /path/to/your/repo
 
 # Specify custom configuration
-agentready /path/to/repo --config my-config.yaml
+agentready assess /path/to/repo --config my-config.yaml
 
 # Custom output directory
-agentready /path/to/repo --output-dir ./reports
+agentready assess /path/to/repo --output-dir ./reports
 ```
 
 ### Example Output
@@ -156,10 +156,10 @@ output_dir: ./custom-reports
 
 ```bash
 # Assessment commands
-agentready PATH                          # Assess repository at PATH
-agentready PATH --verbose                # Show detailed progress
-agentready PATH --config FILE            # Use custom configuration
-agentready PATH --output-dir DIR         # Custom report location
+agentready assess PATH                   # Assess repository at PATH
+agentready assess PATH --verbose         # Show detailed progress
+agentready assess PATH --config FILE     # Use custom configuration
+agentready assess PATH --output-dir DIR  # Custom report location
 
 # Configuration commands
 agentready --validate-config FILE        # Validate configuration
@@ -273,4 +273,4 @@ Contributions welcome! Please ensure:
 
 ---
 
-**Quick Start**: `pip install -e ".[dev]" && agentready .` - Ready in <5 minutes!
+**Quick Start**: `pip install -e ".[dev]" && agentready assess .` - Ready in <5 minutes!
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "agentready"
-version = "2.3.0"
+version = "2.4.0"
 description = "Assess and bootstrap git repositories for AI-assisted development with automated remediation and continuous learning"
 authors = [{name = "Jeremy Eder", email = "jeder@redhat.com"}]
 readme = "README.md"
@@ -25,6 +25,7 @@ dependencies = [
     "anthropic>=0.74.0",
     "jsonschema>=4.17.0",
     "requests>=2.31.0",
+    "pydantic>=2.0.0",
 ]
 
 [project.optional-dependencies]
diff --git a/src/agentready/cli/assess_batch.py b/src/agentready/cli/assess_batch.py
@@ -12,7 +12,7 @@
 from ..reporters.html import HTMLReporter
 from ..reporters.markdown import MarkdownReporter
 from ..services.batch_scanner import BatchScanner
-from ..utils.security import validate_config_dict, validate_path
+from pydantic import ValidationError
 
 
 def _get_agentready_version() -> str:
@@ -29,67 +29,44 @@ def _get_agentready_version() -> str:
 
 
 def _load_config(config_path: Path) -> Config:
-    """Load configuration from YAML file with validation.
+    """Load configuration from YAML file with Pydantic validation.
 
-    Uses centralized security utilities from utils.security module.
-    """
-    import yaml
+    Uses Pydantic for automatic validation, replacing duplicated manual
+    validation code with the Config.from_yaml_dict() classmethod.
 
-    with open(config_path, "r", encoding="utf-8") as f:
-        data = yaml.safe_load(f)
-
-    # Define config schema for validation
-    schema = {
-        "weights": {str: (int, float)},
-        "excluded_attributes": [str],
-        "language_overrides": {str: list},
-        "output_dir": str,
-        "report_theme": str,
-        "custom_theme": dict,
-    }
-
-    # Validate config structure using centralized utility
-    validated = validate_config_dict(data, schema)
-
-    # Additional nested validations for complex types
-    if "language_overrides" in validated:
-        for lang, patterns in validated["language_overrides"].items():
-            if not isinstance(patterns, list):
-                raise ValueError(
-                    f"'language_overrides' values must be lists, got {type(patterns).__name__}"
-                )
-            for pattern in patterns:
-                if not isinstance(pattern, str):
-                    raise ValueError(
-                        f"'language_overrides' patterns must be strings, got {type(pattern).__name__}"
-                    )
-
-    if "custom_theme" in validated:
-        for key, value in validated["custom_theme"].items():
-            if not isinstance(key, str):
-                raise ValueError(
-                    f"'custom_theme' keys must be strings, got {type(key).__name__}"
-                )
-            if not isinstance(value, str):
-                raise ValueError(
-                    f"'custom_theme' values must be strings, got {type(value).__name__}"
-                )
+    Args:
+        config_path: Path to YAML configuration file
 
-    # Validate and sanitize output_dir path
-    output_dir = None
-    if "output_dir" in validated:
-        output_dir = validate_path(
-            validated["output_dir"], allow_system_dirs=False, must_exist=False
-        )
+    Returns:
+        Validated Config instance
 
-    return Config(
-        weights=validated.get("weights", {}),
-        excluded_attributes=validated.get("excluded_attributes", []),
-        language_overrides=validated.get("language_overrides", {}),
-        output_dir=output_dir,
-        report_theme=validated.get("report_theme", "default"),
-        custom_theme=validated.get("custom_theme"),
-    )
+    Raises:
+        ValidationError: If YAML data doesn't match expected schema
+        FileNotFoundError: If config file doesn't exist
+        yaml.YAMLError: If YAML parsing fails
+    """
+    import sys
+
+    import yaml
+
+    try:
+        with open(config_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+
+        # Pydantic handles all validation automatically
+        return Config.from_yaml_dict(data)
+    except ValidationError as e:
+        # Convert Pydantic validation errors to user-friendly messages
+        errors = []
+        for error in e.errors():
+            field = " → ".join(str(x) for x in error["loc"])
+            msg = error["msg"]
+            errors.append(f"  - {field}: {msg}")
+
+        click.echo("Configuration validation failed:", err=True)
+        for error in errors:
+            click.echo(error, err=True)
+        sys.exit(1)
 
 
 def _generate_multi_reports(batch_assessment, output_path: Path, verbose: bool) -> None:
diff --git a/src/agentready/cli/main.py b/src/agentready/cli/main.py
@@ -18,7 +18,7 @@
 from ..reporters.markdown import MarkdownReporter
 from ..services.research_loader import ResearchLoader
 from ..services.scanner import Scanner
-from ..utils.security import validate_config_dict, validate_path
+from pydantic import ValidationError
 from ..utils.subprocess_utils import safe_subprocess_run
 from .align import align
 from .assess_batch import assess_batch
@@ -242,73 +242,45 @@ def run_assessment(repository_path, verbose, output_dir, config_path):
 
 
 def load_config(config_path: Path) -> Config:
-    """Load configuration from YAML file with validation.
+    """Load configuration from YAML file with Pydantic validation.
 
-    Security: Validates YAML structure to prevent injection attacks
-    and malformed data from causing crashes or unexpected behavior.
-    Uses centralized security utilities from utils.security module.
+    Uses Pydantic for automatic validation, replacing 67 lines of manual
+    validation code with declarative field validators.
+
+    Security: Uses yaml.safe_load() for safe YAML parsing and Pydantic
+    validators for type checking and path sanitization.
+
+    Args:
+        config_path: Path to YAML configuration file
+
+    Returns:
+        Validated Config instance
+
+    Raises:
+        ValidationError: If YAML data doesn't match expected schema
+        FileNotFoundError: If config file doesn't exist
+        yaml.YAMLError: If YAML parsing fails
     """
     import yaml
 
-    with open(config_path, "r", encoding="utf-8") as f:
-        data = yaml.safe_load(f)
-
-    # Define config schema for validation
-    schema = {
-        "weights": {str: (int, float)},  # dict[str, int|float]
-        "excluded_attributes": [str],  # list[str]
-        "language_overrides": {
-            str: list
-        },  # dict[str, list] (nested list validated separately)
-        "output_dir": str,
-        "report_theme": str,
-        "custom_theme": dict,  # dict (nested types validated separately)
-    }
-
-    # Validate config structure using centralized utility
-    validated = validate_config_dict(data, schema)
-
-    # Additional nested validations for complex types
-    if "language_overrides" in validated:
-        lang_overrides = validated["language_overrides"]
-        for lang, patterns in lang_overrides.items():
-            if not isinstance(patterns, list):
-                raise ValueError(
-                    f"'language_overrides' values must be lists, got {type(patterns).__name__}"
-                )
-            for pattern in patterns:
-                if not isinstance(pattern, str):
-                    raise ValueError(
-                        f"'language_overrides' patterns must be strings, got {type(pattern).__name__}"
-                    )
-
-    if "custom_theme" in validated:
-        custom_theme = validated["custom_theme"]
-        for key, value in custom_theme.items():
-            if not isinstance(key, str):
-                raise ValueError(
-                    f"'custom_theme' keys must be strings, got {type(key).__name__}"
-                )
-            if not isinstance(value, str):
-                raise ValueError(
-                    f"'custom_theme' values must be strings, got {type(value).__name__}"
-                )
-
-    # Validate and sanitize output_dir path
-    output_dir = None
-    if "output_dir" in validated:
-        output_dir = validate_path(
-            validated["output_dir"], allow_system_dirs=False, must_exist=False
-        )
-
-    return Config(
-        weights=validated.get("weights", {}),
-        excluded_attributes=validated.get("excluded_attributes", []),
-        language_overrides=validated.get("language_overrides", {}),
-        output_dir=output_dir,
-        report_theme=validated.get("report_theme", "default"),
-        custom_theme=validated.get("custom_theme"),
-    )
+    try:
+        with open(config_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+
+        # Pydantic handles all validation automatically
+        return Config.from_yaml_dict(data)
+    except ValidationError as e:
+        # Convert Pydantic validation errors to user-friendly messages
+        errors = []
+        for error in e.errors():
+            field = " → ".join(str(x) for x in error["loc"])
+            msg = error["msg"]
+            errors.append(f"  - {field}: {msg}")
+
+        click.echo("Configuration validation failed:", err=True)
+        for error in errors:
+            click.echo(error, err=True)
+        sys.exit(1)
 
 
 @cli.command()
diff --git a/src/agentready/models/config.py b/src/agentready/models/config.py