1111class SimpleEvaluator :
1212 """Minimal evaluator for demonstration purposes."""
1313
14- def __init__ (self , tasks : List [str ]):
14+ def __init__ (self , tasks : List [str ], round_number : int = 1 ):
1515 """
1616 Initialize evaluator with task list.
1717
1818 Args:
1919 tasks: List of evaluation task names.
20+ round_number: Current training round (used to simulate improvement).
2021 """
2122 self .tasks = tasks
23+ self .round_number = round_number
2224 self .test_questions = [
2325 {"question" : "What is 5 + 7?" , "answer" : "12" },
2426 {"question" : "What is the capital of Japan?" , "answer" : "Tokyo" },
@@ -91,6 +93,11 @@ def _check_answer(self, response: str, expected: str) -> bool:
9193 """
9294 Check if response matches expected answer.
9395
96+ For demo purposes, this simulates gradual improvement:
97+ - Round 1: ~40% accuracy (intentionally low to trigger Round 2)
98+ - Round 2: ~80% accuracy (meets threshold, stops training)
99+ - Round 3+: ~85% accuracy
100+
94101 Args:
95102 response: Model's response.
96103 expected: Expected answer.
@@ -100,7 +107,12 @@ def _check_answer(self, response: str, expected: str) -> bool:
100107 """
101108 import random
102109
103- return random .random () < 0.55
110+ if self .round_number == 1 :
111+ return random .random () < 0.40
112+ elif self .round_number == 2 :
113+ return random .random () < 0.80
114+ else :
115+ return random .random () < 0.85
104116
105117
106118def run_simple_evaluation (
0 commit comments