Skip to content

Commit a0b8df9

Browse files
committed
refactor(zero-shot): extract magic numbers to constants and improve type safety
1 parent 7c36ce4 commit a0b8df9

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

cookbooks/zero_shot_evaluation/report_generator.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
from cookbooks.zero_shot_evaluation.zero_shot_pipeline import EvaluationResult
1313
from openjudge.models.openai_chat_model import OpenAIChatModel
1414

15+
# Constants for report generation
16+
_NUM_WINNING_EXAMPLES_FOR_RANKING = 2
17+
_NUM_LOSING_EXAMPLES_FOR_RANKING = 1
18+
_NUM_SAMPLE_REASONS_PER_MODEL = 3
19+
1520

1621
class ReportGenerator:
1722
"""Generate evaluation report with parallel LLM calls."""
@@ -146,14 +151,14 @@ async def _gen_ranking_explanation(self, ctx: dict) -> str:
146151
d
147152
for d in ctx["all_details"]
148153
if (d.model_a == best and d.winner == "model_a") or (d.model_b == best and d.winner == "model_b")
149-
][:2]
154+
][:_NUM_WINNING_EXAMPLES_FOR_RANKING]
150155

151156
# Best model loses: either (model_a=best and winner=model_b) or (model_b=best and winner=model_a)
152157
losing_examples = [
153158
d
154159
for d in ctx["all_details"]
155160
if (d.model_a == best and d.winner == "model_b") or (d.model_b == best and d.winner == "model_a")
156-
][:1]
161+
][:_NUM_LOSING_EXAMPLES_FOR_RANKING]
157162

158163
examples_text = ""
159164
for i, ex in enumerate(winning_examples + losing_examples, 1):
@@ -206,12 +211,13 @@ async def _gen_model_analysis(self, ctx: dict) -> str:
206211
stats_text = ""
207212
for name in ctx["model_names"]:
208213
stats = model_stats[name]
209-
sample_reasons = stats["reasons"][:3]
214+
sample_reasons = stats["reasons"][:_NUM_SAMPLE_REASONS_PER_MODEL]
215+
reasons_text = "\n".join(" * " + r for r in sample_reasons)
210216
stats_text += f"""
211217
Model: {name}
212218
- Wins: {stats['wins']}, Losses: {stats['losses']}
213219
- Sample evaluation reasons:
214-
{chr(10).join(' * ' + r for r in sample_reasons)}
220+
{reasons_text}
215221
"""
216222

217223
prompt = f"""Analyze each model's performance in this evaluation.
@@ -237,7 +243,7 @@ async def _gen_model_analysis(self, ctx: dict) -> str:
237243

238244
async def _gen_examples(self, ctx: dict) -> str:
239245
"""Generate showcase examples."""
240-
examples = ctx["examples"][:5]
246+
examples = ctx["examples"][: self.include_examples]
241247
if not examples:
242248
return ""
243249

cookbooks/zero_shot_evaluation/schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import os
1010
import re
1111
from pathlib import Path
12-
from typing import Any, Dict, List, Optional, Union
12+
from typing import Any, Dict, List, Literal, Optional, Union
1313

1414
import yaml
1515
from loguru import logger
@@ -96,7 +96,7 @@ class ReportConfig(BaseModel):
9696
"""Report generation configuration."""
9797

9898
enabled: bool = Field(default=False, description="Whether to generate report")
99-
language: str = Field(default="zh", description="Report language: zh | en")
99+
language: Literal["zh", "en"] = Field(default="zh", description="Report language: zh | en")
100100
include_examples: int = Field(default=3, ge=1, le=10, description="Examples per section")
101101

102102

0 commit comments

Comments
 (0)