Skip to content

Commit 62e53c0

Browse files
committed
fix num_tokens
1 parent 1436a18 commit 62e53c0

File tree

4 files changed

+7
-6
lines changed

4 files changed

+7
-6
lines changed

eval/chat_benchmarks/HMMT/eval_instruct.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def __init__(
2929
self,
3030
dataset_name: str = "MathArena/hmmt_feb_2025",
3131
debug: bool = False,
32+
max_tokens: Optional[int] = None,
3233
seed: List[int] = [0, 1234, 1234, 1234],
3334
logger: Optional[logging.Logger] = None,
3435
system_instruction: Optional[str] = None,
@@ -46,7 +47,7 @@ def __init__(
4647
super().__init__(logger=logger, system_instruction=system_instruction)
4748
self.dataset_name = dataset_name
4849
self.debug = debug
49-
self.max_new_tokens = 32768 # set higher to avoid truncation for reasoning models
50+
self.max_new_tokens = max_tokens if max_tokens is not None else 32768 # set higher to avoid truncation for reasoning models
5051
self.seed = seed
5152
self.n_repeat = 10
5253

eval/chat_benchmarks/HumanEval/eval_instruct.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(
2222
self,
2323
languages: List[str] = ["python", "sh"],
2424
data_dir: str = "eval/chat_benchmarks/HumanEval/data",
25-
max_tokens: int = 1024,
25+
max_tokens: Optional[int] = 1024,
2626
num_workers: int = 8,
2727
timeout: float = 3.0,
2828
debug: bool = False,
@@ -45,7 +45,7 @@ def __init__(
4545
super().__init__(logger=logger, system_instruction=system_instruction)
4646
self.languages = languages
4747
self.data_dir = data_dir
48-
self.max_tokens = max_tokens
48+
self.max_tokens = max_tokens if max_tokens is not None else 1024
4949
self.num_workers = num_workers
5050
self.timeout = timeout
5151
self.debug = debug

eval/chat_benchmarks/HumanEvalPlus/eval_instruct.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(
2222
self,
2323
languages: List[str] = ["python"],
2424
data_dir: str = "eval/chat_benchmarks/HumanEvalPlus/data",
25-
max_tokens: int = 1024,
25+
max_tokens: Optional[int] = 1024,
2626
num_workers: int = 8,
2727
timeout: float = 3.0,
2828
debug: bool = False,
@@ -45,7 +45,7 @@ def __init__(
4545
super().__init__(logger=logger, system_instruction=system_instruction)
4646
self.languages = languages
4747
self.data_dir = data_dir
48-
self.max_tokens = max_tokens
48+
self.max_tokens = max_tokens if max_tokens is not None else 1024
4949
self.num_workers = num_workers
5050
self.timeout = timeout
5151
self.debug = debug

eval/eval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ def add_results_metadata(results: Dict, batch_sizes_list: List[int], args: argpa
539539
"use_cache": args.use_cache,
540540
"limit": args.limit,
541541
"annotator_model": args.annotator_model,
542-
"max_tokens": int(args.max_tokens),
542+
"max_tokens": args.max_tokens if args.max_tokens is not None else "default",
543543
# "bootstrap_iters": args.bootstrap_iters,
544544
"gen_kwargs": args.gen_kwargs,
545545
"random_seed": args.seed[0],

0 commit comments

Comments
 (0)