Skip to content

Commit befc0d5

Browse files
jeremyederclaude
andauthored
feat: Implement multi-repository batch assessment (Phase 1 of issue #68) (#74)
* feat: Implement multi-repository batch assessment (Phase 1 of issue #68) Implement comprehensive infrastructure for batch repository assessment with secure cloning, persistent caching, and error handling. ## New Components ### Data Models - BatchAssessment: Container for multiple repository assessments - RepositoryResult: Individual repository assessment result with error tracking - BatchSummary: Aggregated statistics across batch - FailureTracker: Detailed failure information with retry logic ### Services - RepositoryManager: Secure cloning with HTTPS-only validation, shallow clones, disabled Git hooks, and path traversal prevention - AssessmentCache: SQLite-backed result caching with 7-day TTL, parameterized queries for SQL injection prevention - BatchScanner: Orchestrates batch assessments with progress tracking and individual error handling ### CLI - assess-batch command: Supports file-based (--repos-file) and inline (--repos) repository input, configurable output directory, caching control ### Testing - test_batch_assessment.py: Unit tests for data models and validation - test_repository_manager.py: URL validation, path traversal prevention - test_assessment_cache.py: Database operations and TTL handling - test_security_controls.py: Comprehensive security verification - HTTPS-only enforcement - Path traversal prevention - SQL injection prevention - Parameterized queries validation ## Security Features - HTTPS-only URL validation (git:// also allowed) - Shallow cloning (depth=1) for efficiency - Disabled Git hooks during clone - Path traversal prevention with resolve() and relative_to() - SQL injection prevention with parameterized queries - No shell execution (subprocess list arguments, not shell=True) - Input validation and whitespace handling ## Summary Statistics - Score distribution by certification level - Average score across successful assessments - Language breakdown aggregation - Top 10 failing attributes ranking - Success rate calculation ## Caching System - SQLite database at .agentready/cache/assessments.db - TTL-based expiration (default: 7 days) - Indexed queries for performance - Cleanup of expired entries This implementation maintains the existing "library-first architecture" and follows all established patterns and security practices from the codebase. * fix: Add missing test fixtures and improve URL validation for batch assessment - Add sample_repository and sample_assessment fixtures to test_batch_assessment.py - Fix Repository fixture to use temporary directory with .git folder - Improve validate_url() to properly detect URL protocols vs local paths - Add empty URL validation check - All 48 batch assessment tests now passing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent 3dfce24 commit befc0d5

File tree

11 files changed

+2192
-0
lines changed

11 files changed

+2192
-0
lines changed

src/agentready/cli/assess_batch.py

Lines changed: 433 additions & 0 deletions
Large diffs are not rendered by default.

src/agentready/cli/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
from ..services.scanner import Scanner
5555
from ..utils.subprocess_utils import safe_subprocess_run
5656
from .align import align
57+
from .assess_batch import assess_batch
5758
from .bootstrap import bootstrap
5859
from .demo import demo
5960
from .learn import learn
@@ -489,6 +490,7 @@ def generate_config():
489490

490491
# Register commands
491492
cli.add_command(align)
493+
cli.add_command(assess_batch)
492494
cli.add_command(bootstrap)
493495
cli.add_command(demo)
494496
cli.add_command(learn)

src/agentready/models/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
from agentready.models.assessment import Assessment
44
from agentready.models.attribute import Attribute
5+
from agentready.models.batch_assessment import (
6+
BatchAssessment,
7+
BatchSummary,
8+
FailureTracker,
9+
RepositoryResult,
10+
)
511
from agentready.models.citation import Citation
612
from agentready.models.config import Config
713
from agentready.models.discovered_skill import DiscoveredSkill
@@ -21,16 +27,20 @@
2127
"Assessment",
2228
"AssessmentMetadata",
2329
"Attribute",
30+
"BatchAssessment",
31+
"BatchSummary",
2432
"Citation",
2533
"CommandFix",
2634
"Config",
2735
"DiscoveredSkill",
36+
"FailureTracker",
2837
"FileCreationFix",
2938
"FileModificationFix",
3039
"Finding",
3140
"Fix",
3241
"MultiStepFix",
3342
"Repository",
43+
"RepositoryResult",
3444
"Theme",
3545
"validate_theme_contrast",
3646
]
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
"""Batch assessment models for multi-repository evaluation."""
2+
3+
from dataclasses import dataclass, field
4+
from datetime import datetime
5+
from pathlib import Path
6+
7+
from .assessment import Assessment
8+
9+
10+
@dataclass
11+
class RepositoryResult:
12+
"""Result of assessing a single repository in a batch.
13+
14+
Attributes:
15+
repository_url: URL or path of the repository assessed
16+
assessment: The assessment result (None if failed to assess)
17+
error: Error message if assessment failed
18+
error_type: Type of error (e.g., "clone_error", "assessment_error", "validation_error")
19+
duration_seconds: Time taken to complete
20+
cached: Whether this result came from cache
21+
"""
22+
23+
repository_url: str
24+
assessment: Assessment | None
25+
error: str | None = None
26+
error_type: str | None = None
27+
duration_seconds: float = 0.0
28+
cached: bool = False
29+
30+
def __post_init__(self):
31+
"""Validate result data."""
32+
if self.assessment is None and not self.error:
33+
raise ValueError("Either assessment or error must be provided")
34+
35+
if self.assessment is not None and self.error:
36+
raise ValueError("Cannot have both assessment and error")
37+
38+
if self.error and not self.error_type:
39+
raise ValueError("error_type must be provided when error is set")
40+
41+
def is_success(self) -> bool:
42+
"""Check if assessment was successful."""
43+
return self.assessment is not None
44+
45+
def to_dict(self) -> dict:
46+
"""Convert to dictionary for JSON serialization."""
47+
return {
48+
"repository_url": self.repository_url,
49+
"assessment": self.assessment.to_dict() if self.assessment else None,
50+
"error": self.error,
51+
"error_type": self.error_type,
52+
"duration_seconds": self.duration_seconds,
53+
"cached": self.cached,
54+
}
55+
56+
57+
@dataclass
58+
class BatchSummary:
59+
"""Summary statistics for a batch assessment.
60+
61+
Attributes:
62+
total_repositories: Total repositories processed
63+
successful_assessments: Number of successful assessments
64+
failed_assessments: Number of failed assessments
65+
average_score: Average overall score across successful assessments
66+
score_distribution: Count of repos by certification level
67+
language_breakdown: Aggregated language detection across repos
68+
top_failing_attributes: Most frequently failed attributes
69+
"""
70+
71+
total_repositories: int
72+
successful_assessments: int
73+
failed_assessments: int
74+
average_score: float
75+
score_distribution: dict[str, int] = field(default_factory=dict)
76+
language_breakdown: dict[str, int] = field(default_factory=dict)
77+
top_failing_attributes: list[dict[str, str | int]] = field(default_factory=list)
78+
79+
def to_dict(self) -> dict:
80+
"""Convert to dictionary for JSON serialization."""
81+
return {
82+
"total_repositories": self.total_repositories,
83+
"successful_assessments": self.successful_assessments,
84+
"failed_assessments": self.failed_assessments,
85+
"average_score": self.average_score,
86+
"score_distribution": self.score_distribution,
87+
"language_breakdown": self.language_breakdown,
88+
"top_failing_attributes": self.top_failing_attributes,
89+
}
90+
91+
92+
@dataclass
93+
class BatchAssessment:
94+
"""Complete batch assessment of multiple repositories.
95+
96+
Attributes:
97+
batch_id: Unique identifier for this batch
98+
timestamp: When batch started
99+
results: Individual repository results
100+
summary: Aggregated statistics
101+
total_duration_seconds: Total time for entire batch
102+
agentready_version: AgentReady version used
103+
command: CLI command that triggered this batch
104+
"""
105+
106+
batch_id: str
107+
timestamp: datetime
108+
results: list[RepositoryResult]
109+
summary: BatchSummary
110+
total_duration_seconds: float
111+
agentready_version: str = "unknown"
112+
command: str = ""
113+
schema_version: str = "1.0.0"
114+
115+
CURRENT_SCHEMA_VERSION = "1.0.0"
116+
117+
def __post_init__(self):
118+
"""Validate batch assessment data."""
119+
if not self.results:
120+
raise ValueError("Batch must have at least one result")
121+
122+
successful = sum(1 for r in self.results if r.is_success())
123+
if successful != self.summary.successful_assessments:
124+
raise ValueError(
125+
f"Summary successful_assessments ({self.summary.successful_assessments}) "
126+
f"doesn't match actual successful results ({successful})"
127+
)
128+
129+
failed = len(self.results) - successful
130+
if failed != self.summary.failed_assessments:
131+
raise ValueError(
132+
f"Summary failed_assessments ({self.summary.failed_assessments}) "
133+
f"doesn't match actual failed results ({failed})"
134+
)
135+
136+
def get_success_rate(self) -> float:
137+
"""Get success rate as percentage."""
138+
if not self.results:
139+
return 0.0
140+
return (self.summary.successful_assessments / len(self.results)) * 100
141+
142+
def to_dict(self) -> dict:
143+
"""Convert to dictionary for JSON serialization."""
144+
return {
145+
"schema_version": self.schema_version,
146+
"batch_id": self.batch_id,
147+
"timestamp": self.timestamp.isoformat(),
148+
"results": [r.to_dict() for r in self.results],
149+
"summary": self.summary.to_dict(),
150+
"total_duration_seconds": self.total_duration_seconds,
151+
"success_rate": self.get_success_rate(),
152+
"agentready_version": self.agentready_version,
153+
"command": self.command,
154+
}
155+
156+
157+
@dataclass
158+
class FailureTracker:
159+
"""Track failures during batch assessment for reporting.
160+
161+
Attributes:
162+
repository_url: URL of failed repository
163+
error_type: Type of error
164+
error_message: Detailed error message
165+
timestamp: When failure occurred
166+
retry_count: Number of retry attempts
167+
can_retry: Whether this error is retryable
168+
"""
169+
170+
repository_url: str
171+
error_type: str
172+
error_message: str
173+
timestamp: datetime = field(default_factory=datetime.now)
174+
retry_count: int = 0
175+
can_retry: bool = True
176+
177+
RETRYABLE_ERRORS = {
178+
"network_error",
179+
"timeout",
180+
"rate_limit",
181+
"temporary_failure",
182+
}
183+
184+
def __post_init__(self):
185+
"""Update retry status based on error type."""
186+
if self.error_type not in self.RETRYABLE_ERRORS:
187+
self.can_retry = False
188+
189+
def to_dict(self) -> dict:
190+
"""Convert to dictionary for JSON serialization."""
191+
return {
192+
"repository_url": self.repository_url,
193+
"error_type": self.error_type,
194+
"error_message": self.error_message,
195+
"timestamp": self.timestamp.isoformat(),
196+
"retry_count": self.retry_count,
197+
"can_retry": self.can_retry,
198+
}

0 commit comments

Comments
 (0)