Skip to content

Commit 775682f

Browse files
jeremyederclaude
andcommitted
feat: add interactive heatmap visualization for batch assessments
Adds comprehensive heatmap visualization feature for batch repository assessments, displaying attributes (columns) versus repositories (rows) with color-coded scores. Core Features: - Interactive Plotly heatmap with zoom, pan, and hover tooltips - Repositories sorted by overall score (descending) - Attributes sorted by tier (1-4), then alphabetically - Color scale: Gray (N/A), Red (0-39), Yellow (40-59), Green (60-89), Dark Green (90-100) - Certification badges (💎🥇🥈🥉⚠️) with overall scores annotated on left axis - Standalone HTML output (5MB, no external dependencies) CLI Integration: - `--generate-heatmap` flag for assess-batch command - `--heatmap-output` flag for custom output path - Automatic heatmap generation saved to reports-*/heatmap.html Implementation Details: 1. Extended AttributeAnalyzer with analyze_batch() method 2. DataFrame transformation with pandas for matrix data 3. Custom colorscale with -1 sentinel for NaN (gray for not_applicable) 4. Duplicate attribute deduplication (workaround for assessor bug) 5. Added analyze_batch_from_json() to bypass broken cache deserialization Duplicate Attribute Fix: - Identified 3 duplicate attributes (one_command_setup, architecture_decisions, issue_pr_templates) - Each appeared 2× per repo, with 2nd occurrence marked not_applicable (causing gray squares) - Added deduplication logic: skip duplicates, keep first occurrence only - Reduces 31 attributes → 28 real attributes, eliminating 108 erroneous gray cells Cache Deserialization Workaround: - Discovered AssessmentCache._deserialize_assessment() raises NotImplementedError - Added analyze_batch_from_json() method that works directly with dict data - Added _prepare_batch_dataframe_from_json() for JSON-based DataFrame preparation - Enables heatmap regeneration without requiring object deserialization Utility Scripts: - scripts/extract_repo_urls.py: Extract URLs from filtered JSON for batch input - scripts/fix_heatmap_data.py: Remove duplicate attributes from all-assessments.json - scripts/regenerate_heatmap.py: Regenerate heatmap from fixed JSON data Testing: - Validated with 36 OpenDataHub repositories - Average score: 40.1/100 (Bronze) - 100% success rate on initial run - Heatmap size: 5.1 MB (36 repos × 28 attributes) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent eb17c3b commit 775682f

File tree

7 files changed

+512
-9
lines changed

7 files changed

+512
-9
lines changed

scripts/extract_repo_urls.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Extract repository URLs from filtered GitHub org analysis.
4+
5+
Reads the filtered active originals JSON and outputs a simple text file
6+
with one repository URL per line for use with agentready assess-batch.
7+
"""
8+
9+
import json
10+
from pathlib import Path
11+
12+
13+
def main():
14+
# Input: filtered repos JSON
15+
input_file = Path(".cache/github-org-analysis/opendatahub-io-active-originals.json")
16+
17+
# Output: simple text file with URLs
18+
output_file = Path("opendatahub-repos.txt")
19+
20+
# Load filtered repository data
21+
print(f"Loading filtered repositories from {input_file}")
22+
with input_file.open() as f:
23+
data = json.load(f)
24+
25+
repos = data["repositories"]
26+
print(f"Found {len(repos)} repositories")
27+
28+
# Extract URLs
29+
urls = [repo["url"] for repo in repos]
30+
31+
# Write to output file
32+
with output_file.open("w") as f:
33+
for url in urls:
34+
f.write(f"{url}\n")
35+
36+
print(f"✅ Wrote {len(urls)} repository URLs to {output_file}")
37+
print(f"\nUsage:")
38+
print(
39+
f" agentready assess-batch --repos-file {output_file} --cache-dir ~/repos/cache/github-org --generate-heatmap"
40+
)
41+
42+
43+
if __name__ == "__main__":
44+
main()

scripts/fix_heatmap_data.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#!/usr/bin/env python3
2+
"""Fix heatmap by removing duplicate attributes from successful batch assessment."""
3+
4+
import json
5+
from pathlib import Path
6+
7+
8+
def fix_duplicate_attributes():
9+
"""Remove duplicate attribute findings from all-assessments.json."""
10+
input_file = Path(".agentready/batch/reports-20251124-160049/all-assessments.json")
11+
output_file = Path(
12+
".agentready/batch/reports-20251124-160049/all-assessments-fixed.json"
13+
)
14+
15+
print(f"Loading {input_file}...")
16+
with input_file.open() as f:
17+
data = json.load(f)
18+
19+
total_removed = 0
20+
for result in data["results"]:
21+
assessment = result.get("assessment")
22+
if not assessment:
23+
continue
24+
25+
# Track seen attributes and filter duplicates (keep first occurrence)
26+
seen_attrs = set()
27+
fixed_findings = []
28+
29+
for finding in assessment["findings"]:
30+
attr_id = finding["attribute"]["id"]
31+
if attr_id in seen_attrs:
32+
total_removed += 1
33+
print(
34+
f" Removing duplicate: {attr_id} from "
35+
f"{assessment['repository']['name']}"
36+
)
37+
continue
38+
seen_attrs.add(attr_id)
39+
fixed_findings.append(finding)
40+
41+
# Update findings list
42+
assessment["findings"] = fixed_findings
43+
44+
# Update counts
45+
assessment["attributes_assessed"] = len(fixed_findings)
46+
47+
print(f"\n✓ Removed {total_removed} duplicate attribute findings")
48+
print(f"✓ Writing fixed data to {output_file}...")
49+
50+
with output_file.open("w") as f:
51+
json.dump(data, f, indent=2)
52+
53+
print(f"✓ Done! File size: {output_file.stat().st_size / 1024 / 1024:.1f} MB")
54+
55+
56+
if __name__ == "__main__":
57+
fix_duplicate_attributes()

scripts/regenerate_heatmap.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/usr/bin/env python3
2+
"""Regenerate heatmap from existing batch assessment JSON."""
3+
4+
import json
5+
import sys
6+
from pathlib import Path
7+
8+
# Add src to path
9+
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
10+
11+
from agentready.services.attribute_analyzer import AttributeAnalyzer
12+
13+
14+
def main():
15+
"""Load fixed all-assessments.json and regenerate heatmap with deduplication fix."""
16+
# Use the fixed JSON with duplicates removed
17+
input_file = Path(
18+
".agentready/batch/reports-20251124-160049/all-assessments-fixed.json"
19+
)
20+
output_file = Path(".agentready/batch/reports-20251124-160049/heatmap-fixed.html")
21+
22+
print(f"Loading batch assessment from {input_file}...")
23+
24+
with input_file.open() as f:
25+
batch_data = json.load(f)
26+
27+
successful = len([r for r in batch_data["results"] if r.get("assessment")])
28+
print(f"Loaded {len(batch_data['results'])} repositories, {successful} successful")
29+
30+
# Generate heatmap using the new JSON-based method
31+
print(f"Generating heatmap to {output_file}...")
32+
analyzer = AttributeAnalyzer()
33+
analyzer.analyze_batch_from_json(batch_data, output_file)
34+
35+
print(f"\n✓ Heatmap regenerated successfully!")
36+
print(f" File: {output_file}")
37+
print(f" Size: {output_file.stat().st_size / 1024 / 1024:.1f} MB")
38+
39+
40+
if __name__ == "__main__":
41+
main()

src/agentready/cli/assess_batch.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,18 @@ def _generate_multi_reports(batch_assessment, output_path: Path, verbose: bool)
249249
default=None,
250250
help="Custom cache directory (default: .agentready/cache/)",
251251
)
252+
@click.option(
253+
"--generate-heatmap",
254+
is_flag=True,
255+
default=False,
256+
help="Generate interactive Plotly heatmap visualization",
257+
)
258+
@click.option(
259+
"--heatmap-output",
260+
type=click.Path(),
261+
default=None,
262+
help="Custom path for heatmap HTML (default: reports-*/heatmap.html)",
263+
)
252264
def assess_batch(
253265
repos_file: Optional[str],
254266
repos: tuple,
@@ -260,6 +272,8 @@ def assess_batch(
260272
config: Optional[str],
261273
use_cache: bool,
262274
cache_dir: Optional[str],
275+
generate_heatmap: bool,
276+
heatmap_output: Optional[str],
263277
):
264278
"""Assess multiple repositories in a batch operation.
265279
@@ -414,6 +428,30 @@ def show_progress(current: int, total: int):
414428
# Generate comprehensive Phase 2 reports
415429
_generate_multi_reports(batch_assessment, output_path, verbose)
416430

431+
# Generate heatmap if requested
432+
if generate_heatmap:
433+
from ..services.attribute_analyzer import AttributeAnalyzer
434+
435+
timestamp = batch_assessment.timestamp.strftime("%Y%m%d-%H%M%S")
436+
reports_dir = output_path / f"reports-{timestamp}"
437+
heatmap_path = (
438+
Path(heatmap_output) if heatmap_output else (reports_dir / "heatmap.html")
439+
)
440+
441+
if verbose:
442+
click.echo(f"\nGenerating heatmap visualization...")
443+
444+
try:
445+
analyzer = AttributeAnalyzer()
446+
analyzer.analyze_batch(batch_assessment, heatmap_path)
447+
click.echo(f" ✓ heatmap.html")
448+
except Exception as e:
449+
click.echo(f"⚠ Warning: Failed to generate heatmap: {e}", err=True)
450+
if verbose:
451+
import traceback
452+
453+
traceback.print_exc()
454+
417455
# Print summary
418456
click.echo("\n" + "=" * 50)
419457
click.echo("Batch Assessment Summary")

0 commit comments

Comments
 (0)