Skip to content

Commit a8fd88d

Browse files
committed
Fix rubric20-semantic HTML renderer to match actual JSON structure
ISSUE: v5 evaluation HTML files showed 0 scores and empty content ROOT CAUSE: HTML renderer expected old JSON structure with evaluation_metadata, category_scores, question_scores, overall_summary fields, but actual JSON has different structure (overall_score, categories with nested questions, metadata at root level) CHANGES: - Updated render_evaluation_html_rubric20_semantic.py to extract metadata from root level (project, rubric, d4d_file, method, timestamp, model) - Fixed overall score extraction from overall_score.total_points/max_points/percentage - Updated category iteration to process categories list with nested questions - Fixed question field names (id vs question_number, name vs question, quality_note vs justification, score_type vs scoring_type) - Changed evidence and semantic_analysis from list/dict iteration to string display RESULTS: - All 4 v5 evaluation HTML files now show correct scores: - AI-READI: 79/84 (94.0%) - CHORUS: 71/84 (84.5%) - CM4AI: 77/84 (91.7%) - VOICE: 81/84 (96.4%) - Full category breakdowns, question assessments, and semantic analysis visible - File sizes increased from 12KB to 45-58KB with complete content
1 parent 765df8d commit a8fd88d

13 files changed

+5776
-446
lines changed

data/d4d_html/concatenated/claudecode_agent/AI_READI_evaluation_rubric20.html

Lines changed: 517 additions & 9 deletions
Large diffs are not rendered by default.

data/d4d_html/concatenated/claudecode_agent/CHORUS_evaluation_rubric20.html

Lines changed: 517 additions & 9 deletions
Large diffs are not rendered by default.

data/d4d_html/concatenated/claudecode_agent/CM4AI_evaluation_rubric20.html

Lines changed: 617 additions & 9 deletions
Large diffs are not rendered by default.

data/d4d_html/concatenated/claudecode_agent/VOICE_evaluation_rubric20.html

Lines changed: 252 additions & 103 deletions
Large diffs are not rendered by default.

docs/html_output/D4D_-_AI-READI_v5_evaluation.html

Lines changed: 517 additions & 9 deletions
Large diffs are not rendered by default.

docs/html_output/D4D_-_CHORUS_v5_evaluation.html

Lines changed: 517 additions & 9 deletions
Large diffs are not rendered by default.

docs/html_output/D4D_-_CM4AI_v5_evaluation.html

Lines changed: 617 additions & 9 deletions
Large diffs are not rendered by default.

docs/html_output/D4D_-_VOICE_v5_evaluation.html

Lines changed: 252 additions & 103 deletions
Large diffs are not rendered by default.

scripts/render_evaluation_html_rubric20_semantic.py

Lines changed: 67 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,49 @@
1010
def generate_evaluation_html(eval_data, output_path):
1111
"""Generate HTML from rubric20 evaluation JSON data"""
1212

13-
metadata = eval_data.get("evaluation_metadata", {})
14-
category_scores = eval_data.get("category_scores", [])
15-
question_scores = eval_data.get("question_scores", [])
16-
overall = eval_data.get("overall_summary", {})
17-
semantic = eval_data.get("semantic_analysis", {})
18-
19-
# Calculate overall stats
20-
total_score = overall.get("total_score", 0)
21-
max_score = metadata.get("max_score", 84)
13+
# Extract metadata from root level and nested structures
14+
project = eval_data.get("project", "Unknown")
15+
rubric = eval_data.get("rubric", "rubric20-semantic")
16+
d4d_file = eval_data.get("d4d_file", "N/A")
17+
method = eval_data.get("method", "N/A")
18+
timestamp = eval_data.get("evaluation_timestamp", "N/A")
19+
model_info = eval_data.get("model", {})
20+
21+
# Extract overall score
22+
overall = eval_data.get("overall_score", {})
23+
total_score = overall.get("total_points", 0)
24+
max_score = overall.get("max_points", 84)
2225
percentage = overall.get("percentage", 0)
23-
grade = overall.get("grade", "N/A")
26+
27+
# Calculate grade
28+
if percentage >= 95:
29+
grade = "A+"
30+
elif percentage >= 90:
31+
grade = "A"
32+
elif percentage >= 85:
33+
grade = "B+"
34+
elif percentage >= 80:
35+
grade = "B"
36+
elif percentage >= 75:
37+
grade = "C+"
38+
elif percentage >= 70:
39+
grade = "C"
40+
else:
41+
grade = "D"
42+
43+
# Extract categories (with nested questions)
44+
categories = eval_data.get("categories", [])
45+
46+
# Extract semantic analysis
47+
semantic = eval_data.get("semantic_analysis", {})
2448

2549
# Start HTML
2650
html = f"""<!DOCTYPE html>
2751
<html lang="en">
2852
<head>
2953
<meta charset="UTF-8">
3054
<meta name="viewport" content="width=device-width, initial-scale=1.0">
31-
<title>Rubric20-Semantic Evaluation: {metadata.get('project', 'Unknown')}</title>
55+
<title>Rubric20-Semantic Evaluation: {project}</title>
3256
<style>
3357
* {{
3458
margin: 0;
@@ -376,27 +400,27 @@ def generate_evaluation_html(eval_data, output_path):
376400
<div class="metadata-grid">
377401
<div class="metadata-item">
378402
<div class="metadata-label">Project</div>
379-
<div class="metadata-value">{metadata.get('project', 'Unknown')}</div>
403+
<div class="metadata-value">{project}</div>
380404
</div>
381405
<div class="metadata-item">
382406
<div class="metadata-label">D4D File</div>
383-
<div class="metadata-value">{Path(metadata.get('d4d_file', '')).name}</div>
407+
<div class="metadata-value">{Path(d4d_file).name}</div>
384408
</div>
385409
<div class="metadata-item">
386410
<div class="metadata-label">Evaluator Model</div>
387-
<div class="metadata-value">{metadata.get('evaluator_model', 'Unknown')}</div>
411+
<div class="metadata-value">{model_info.get('name', 'Unknown')}</div>
388412
</div>
389413
<div class="metadata-item">
390414
<div class="metadata-label">Rubric Type</div>
391-
<div class="metadata-value">{metadata.get('rubric', 'rubric20-semantic')}</div>
415+
<div class="metadata-value">{rubric}</div>
392416
</div>
393417
<div class="metadata-item">
394418
<div class="metadata-label">Temperature</div>
395-
<div class="metadata-value">{metadata.get('temperature', 'N/A')}</div>
419+
<div class="metadata-value">{model_info.get('temperature', 'N/A')}</div>
396420
</div>
397421
<div class="metadata-item">
398422
<div class="metadata-label">Evaluation Date</div>
399-
<div class="metadata-value">{metadata.get('evaluation_date', 'Unknown')}</div>
423+
<div class="metadata-value">{timestamp}</div>
400424
</div>
401425
</div>
402426
</div>
@@ -411,11 +435,14 @@ def generate_evaluation_html(eval_data, output_path):
411435
"""
412436

413437
# Add category cards
414-
for cat in category_scores:
415-
cat_name = cat.get('category', 'Unknown')
416-
cat_score = cat.get('score', 0)
417-
cat_max = cat.get('max_score', 0)
418-
cat_pct = cat.get('percentage', 0)
438+
for cat in categories:
439+
cat_name = cat.get('name', 'Unknown')
440+
questions = cat.get('questions', [])
441+
442+
# Calculate category totals from questions
443+
cat_score = sum(q.get('score', 0) for q in questions)
444+
cat_max = sum(q.get('max_score', 5) for q in questions)
445+
cat_pct = (cat_score / cat_max * 100) if cat_max > 0 else 0
419446

420447
html += f"""
421448
<div class="category-card">
@@ -430,35 +457,31 @@ def generate_evaluation_html(eval_data, output_path):
430457
</div>
431458
"""
432459

433-
# Group questions by category
434-
questions_by_category = {}
435-
for q in question_scores:
436-
cat = q.get('category', 'Unknown')
437-
if cat not in questions_by_category:
438-
questions_by_category[cat] = []
439-
questions_by_category[cat].append(q)
440-
441460
# Add questions organized by category
442461
html += """
443462
<h2>Question-Level Assessment</h2>
444463
"""
445464

446-
for cat_name, questions in questions_by_category.items():
465+
for cat in categories:
466+
cat_name = cat.get('name', 'Unknown')
467+
questions = cat.get('questions', [])
447468
html += f"""
448469
<div class="category-section">
449470
<h3>{cat_name}</h3>
450471
"""
451472

452473
for q in questions:
453-
q_num = q.get('question_number', 0)
454-
q_text = q.get('question', 'Unknown')
474+
q_num = q.get('id', 0)
475+
q_text = q.get('name', 'Unknown')
476+
q_desc = q.get('description', '')
455477
q_score = q.get('score', 0)
456478
q_max = q.get('max_score', 5)
457479
q_pct = (q_score / q_max * 100) if q_max > 0 else 0
458-
q_type = q.get('scoring_type', '0-5 scale')
459-
justification = q.get('justification', '')
460-
evidence = q.get('evidence', [])
461-
semantic_checks = q.get('semantic_checks', {})
480+
q_type = q.get('score_type', '0-5 scale')
481+
q_label = q.get('score_label', '')
482+
justification = q.get('quality_note', '')
483+
evidence = q.get('evidence', '')
484+
semantic_analysis = q.get('semantic_analysis', '')
462485

463486
# Determine score class
464487
if q_pct == 100:
@@ -490,30 +513,18 @@ def generate_evaluation_html(eval_data, output_path):
490513
"""
491514

492515
if evidence:
493-
html += """
516+
html += f"""
494517
<div class="detail">
495518
<span class="detail-label">Evidence Found</span>
496-
<ul class="evidence-list">
497-
"""
498-
for e in evidence:
499-
html += f"""
500-
<li class="evidence-item">{e}</li>
501-
"""
502-
html += """
503-
</ul>
519+
<div class="detail-content">{evidence}</div>
504520
</div>
505521
"""
506522

507-
if semantic_checks:
508-
html += """
509-
<div class="semantic-checks">
510-
<strong>Semantic Analysis:</strong>
511-
"""
512-
for check_type, check_value in semantic_checks.items():
513-
html += f"""
514-
<div><strong>{check_type.replace('_', ' ').title()}:</strong> {check_value}</div>
515-
"""
516-
html += """
523+
if semantic_analysis:
524+
html += f"""
525+
<div class="detail">
526+
<span class="detail-label">Semantic Analysis</span>
527+
<div class="detail-content">{semantic_analysis}</div>
517528
</div>
518529
"""
519530

0 commit comments

Comments
 (0)