Update eval_instruct.py (#126)

penfever · web-flow · commit d2cacb7ffe47 · 2025-06-03T02:39:04.000-07:00
* Update eval_instruct.py

Fix missing comma

* Update eval_instruct.py

* Update eval_instruct.py
diff --git a/eval/chat_benchmarks/MTBench/eval_instruct.py b/eval/chat_benchmarks/MTBench/eval_instruct.py
@@ -71,7 +71,7 @@ def __init__(
         config: Optional[MTBenchConfig] = None,
         debug: bool = False,
         annotator_model: str = "gpt-4o-mini-2024-07-18",
-        max_tokens: Optional[int] = 1024
+        max_tokens: Optional[int] = 1024,
         logger: Optional[logging.Logger] = None,
         system_instruction: Optional[str] = None,
     ):
@@ -87,13 +87,13 @@ def __init__(
         """
         super().__init__(logger=logger, system_instruction=system_instruction)
         self.base_path = Path(base_path)
+        if getattr(self, "config", None) is None:
+            self.config = MTBenchConfig(
+                judge_model=annotator_model,
+            )
+        else:
+            self.config = config
         self.config.max_new_token = max_tokens if max_tokens is not None else 1024
-        if annotator_model == "auto":
-            annotator_model = "gpt-4"
-        if config:
-            print(f"Warning: Overwriting config.judge_model = {annotator_model} ")
-            config.judge_model = annotator_model
-        self.config = config or MTBenchConfig(judge_model=annotator_model)
         self.debug = debug
 
         # Setup paths
@@ -116,7 +116,6 @@ def get_model_answers(self, model: LM, model_id: str, questions: List[Dict[str,
         max_turns = max(len(q["turns"]) for q in questions)
         answer_file = self.answer_dir / f"{model_id}.jsonl"
 
-        self.config.max_new_token = self.max_new_token
         # Process each turn
         for turn_num in range(max_turns):
             self.logger.info(f"Processing Turn {turn_num + 1}")