1+ import dataclasses
12import os
2- from collections import namedtuple
3+ from dataclasses import dataclass
34
45HEADER_TEMPLATE = """\
56 #!/bin/bash
4344 "short-baseline" ,
4445]
4546
46- ModelConfig = namedtuple ("ModelConfig" , "model_class large small size extras" )
47+
48+ @dataclasses .dataclass
49+ class ModelConfig :
50+ model_class : str
51+ large : str
52+ small : str
53+ size : int
54+ extras : str
55+ benches : list [str ] = dataclasses .field (default_factory = lambda : BENCHES )
56+
4757
4858MODELS = [
4959 # model class, large, small, size, extras
7282 size = 8 ,
7383 extras = "--engine-timeout 1800" , # 30 min timeout per trial
7484 ),
85+ # oct25
86+ ModelConfig (
87+ model_class = "qwen3" ,
88+ large = "Qwen/Qwen3-235B-A22B-Thinking-2507" ,
89+ small = "Qwen/Qwen3-4B-Thinking-2507" ,
90+ size = 8 ,
91+ extras = "--engine-timeout 1800" , # 30 min timeout per trial
92+ benches = ["fanoutqa" , "travelplanner" ],
93+ ),
94+ ModelConfig (
95+ model_class = "gpt-oss" ,
96+ large = "openai/gpt-oss-120b" ,
97+ small = "openai/gpt-oss-20b" ,
98+ size = 8 ,
99+ extras = "--engine-timeout 1800" , # 30 min timeout per trial
100+ benches = ["fanoutqa" , "travelplanner" ],
101+ ),
75102]
76103
77104
@@ -82,7 +109,7 @@ def main():
82109 gpus = model .size
83110 gpuconstraint = "#SBATCH --constraint=48GBgpu" if model .size else ""
84111
85- for bench in BENCHES :
112+ for bench in model . benches :
86113 # WA needs extra env vars
87114 if bench == "webarena" :
88115 bench_extras = "bash slurm/webarena-startup.sh\n sleep 600"
@@ -126,7 +153,7 @@ def main():
126153 ).strip ()
127154 all_commands .append (content )
128155 os .makedirs (f"slurm/{ model .model_class } " , exist_ok = True )
129- with open (f"slurm/{ model .model_class } /{ bench } -{ idx + 1 } -{ config } .sh" , "w" ) as f :
156+ with open (f"slurm/{ model .model_class } /{ bench } -{ idx + 1 } -{ config } .sh" , "w" ) as f :
130157 f .write (header )
131158 f .write ("\n " )
132159 f .write (content )
0 commit comments