Skip to content

Commit 3b422a0

Browse files
committed
Tune demo to intentionally fail Round 1 and pass Round 2
1 parent fdbb488 commit 3b422a0

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

simple_eval.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,16 @@
1111
class SimpleEvaluator:
1212
"""Minimal evaluator for demonstration purposes."""
1313

14-
def __init__(self, tasks: List[str]):
14+
def __init__(self, tasks: List[str], round_number: int = 1):
1515
"""
1616
Initialize evaluator with task list.
1717
1818
Args:
1919
tasks: List of evaluation task names.
20+
round_number: Current training round (used to simulate improvement).
2021
"""
2122
self.tasks = tasks
23+
self.round_number = round_number
2224
self.test_questions = [
2325
{"question": "What is 5 + 7?", "answer": "12"},
2426
{"question": "What is the capital of Japan?", "answer": "Tokyo"},
@@ -91,6 +93,11 @@ def _check_answer(self, response: str, expected: str) -> bool:
9193
"""
9294
Check if response matches expected answer.
9395
96+
For demo purposes, this simulates gradual improvement:
97+
- Round 1: ~40% accuracy (intentionally low to trigger Round 2)
98+
- Round 2: ~80% accuracy (meets threshold, stops training)
99+
- Round 3+: ~85% accuracy
100+
94101
Args:
95102
response: Model's response.
96103
expected: Expected answer.
@@ -100,7 +107,12 @@ def _check_answer(self, response: str, expected: str) -> bool:
100107
"""
101108
import random
102109

103-
return random.random() < 0.55
110+
if self.round_number == 1:
111+
return random.random() < 0.40
112+
elif self.round_number == 2:
113+
return random.random() < 0.80
114+
else:
115+
return random.random() < 0.85
104116

105117

106118
def run_simple_evaluation(

0 commit comments

Comments
 (0)