Skip to content

Commit 6ce039a

Browse files
committed
add qwen3, gptoss
1 parent e8288c1 commit 6ce039a

40 files changed

+749
-8
lines changed

bench_engines.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,44 @@ def get_engine(model_class: str, model_id: str, context_size: int = None):
168168
},
169169
sampling_params=SamplingParams(temperature=0.7, max_tokens=2048, min_tokens=1),
170170
)
171+
# ===== GPTOSS =====
172+
if model_class == "gpt-oss":
173+
from kani.ext.vllm import VLLMServerEngine
174+
from kani.model_specific.gpt_oss import GPTOSSParser
175+
176+
model = VLLMServerEngine(
177+
model_id=model_id,
178+
max_context_size=context_size or 131072,
179+
vllm_args={
180+
"tensor_parallel_size": 8,
181+
"enable_prefix_caching": True,
182+
},
183+
temperature=0.7,
184+
max_tokens=8192,
185+
min_tokens=1,
186+
)
187+
return GPTOSSParser(model)
188+
# ===== QWEN3 =====
189+
if model_class == "qwen3":
190+
from kani.ext.vllm import VLLMServerEngine
191+
from kani.model_specific.qwen3 import Qwen3ThinkingParser
192+
193+
model = VLLMServerEngine(
194+
model_id=model_id,
195+
max_context_size=context_size or 262144,
196+
vllm_args={
197+
"tensor_parallel_size": 8,
198+
"enable_prefix_caching": True,
199+
},
200+
# suggested from model card
201+
temperature=0.6,
202+
top_p=0.95,
203+
top_k=20,
204+
min_p=0,
205+
max_tokens=8192,
206+
min_tokens=1,
207+
)
208+
return Qwen3ThinkingParser(model)
171209
raise ValueError("unknown engine")
172210

173211

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ classifiers = [
2121
"Topic :: Scientific/Engineering :: Artificial Intelligence",
2222
]
2323
dependencies = [
24-
"kani>=1.1.0,<2.0.0",
24+
"kani>=1.7.0,<2.0.0",
2525
"kani-ratelimits",
2626
"pydantic>=2.0.0,<3.0.0",
2727
"rapidfuzz>=3.9.0,<4.0.0",

redel/tools/fanoutqa/impl.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def search(self, query: str):
7575
# if the content fits in the context, return that
7676
wiki_content = fanoutqa.wiki_content(found_article)
7777
full_content = prompt.format(f"<content>\n{wiki_content}\n</content>\n")
78-
if (retrieved_tokens := self.kani.message_token_len(ChatMessage.user(full_content))) <= self.max_search_tokens:
78+
if (retrieved_tokens := len(full_content)) <= self.max_search_tokens:
7979
self.app.dispatch(
8080
FOQARetrievalType(
8181
id=self.kani.id,
@@ -96,14 +96,14 @@ def search(self, query: str):
9696
for doc in corpus.best(user_query):
9797
formatted = f"<fragment>\n{doc.content}\n</fragment>\n"
9898
content = prompt.format("".join(retrieved_docs) + formatted)
99-
doc_len = self.kani.engine.message_len(ChatMessage.user(content))
99+
doc_len = len(content)
100100
if doc_len > self.max_search_tokens:
101101
break
102102
retrieved_docs.append(formatted)
103103

104104
# return
105105
out = prompt.format("".join(retrieved_docs))
106-
retrieved_tokens = self.kani.engine.message_len(ChatMessage.user(out))
106+
retrieved_tokens = len(out)
107107
self.app.dispatch(
108108
FOQARetrievalType(
109109
id=self.kani.id,

slurm/gen_slurm.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
import dataclasses
12
import os
2-
from collections import namedtuple
3+
from dataclasses import dataclass
34

45
HEADER_TEMPLATE = """\
56
#!/bin/bash
@@ -43,7 +44,16 @@
4344
"short-baseline",
4445
]
4546

46-
ModelConfig = namedtuple("ModelConfig", "model_class large small size extras")
47+
48+
@dataclasses.dataclass
49+
class ModelConfig:
50+
model_class: str
51+
large: str
52+
small: str
53+
size: int
54+
extras: str
55+
benches: list[str] = dataclasses.field(default_factory=lambda: BENCHES)
56+
4757

4858
MODELS = [
4959
# model class, large, small, size, extras
@@ -72,6 +82,23 @@
7282
size=8,
7383
extras="--engine-timeout 1800", # 30 min timeout per trial
7484
),
85+
# oct25
86+
ModelConfig(
87+
model_class="qwen3",
88+
large="Qwen/Qwen3-235B-A22B-Thinking-2507",
89+
small="Qwen/Qwen3-4B-Thinking-2507",
90+
size=8,
91+
extras="--engine-timeout 1800", # 30 min timeout per trial
92+
benches=["fanoutqa", "travelplanner"],
93+
),
94+
ModelConfig(
95+
model_class="gpt-oss",
96+
large="openai/gpt-oss-120b",
97+
small="openai/gpt-oss-20b",
98+
size=8,
99+
extras="--engine-timeout 1800", # 30 min timeout per trial
100+
benches=["fanoutqa", "travelplanner"],
101+
),
75102
]
76103

77104

@@ -82,7 +109,7 @@ def main():
82109
gpus = model.size
83110
gpuconstraint = "#SBATCH --constraint=48GBgpu" if model.size else ""
84111

85-
for bench in BENCHES:
112+
for bench in model.benches:
86113
# WA needs extra env vars
87114
if bench == "webarena":
88115
bench_extras = "bash slurm/webarena-startup.sh\nsleep 600"
@@ -126,7 +153,7 @@ def main():
126153
).strip()
127154
all_commands.append(content)
128155
os.makedirs(f"slurm/{model.model_class}", exist_ok=True)
129-
with open(f"slurm/{model.model_class}/{bench}-{idx+1}-{config}.sh", "w") as f:
156+
with open(f"slurm/{model.model_class}/{bench}-{idx + 1}-{config}.sh", "w") as f:
130157
f.write(header)
131158
f.write("\n")
132159
f.write(content)

slurm/gpt-oss/fanoutqa-1-full.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
#
3+
#SBATCH --partition=p_nlp
4+
#SBATCH --job-name=rd-full-fanoutqa-gpt-oss
5+
#SBATCH --output=/nlpgpu/data/andrz/logs/%j.%x.log
6+
#SBATCH --error=/nlpgpu/data/andrz/logs/%j.%x.log
7+
#SBATCH --time=7-0
8+
#SBATCH --nodes=1
9+
#SBATCH -c 16
10+
#SBATCH --mem=400G
11+
#SBATCH --gpus=8
12+
#SBATCH --mail-user=andrz@seas.upenn.edu
13+
#SBATCH --mail-type=END,FAIL
14+
#SBATCH --constraint=48GBgpu
15+
16+
source slurm/env.sh
17+
export VLLM_WORKER_MULTIPROC_METHOD=spawn
18+
python bench_fanoutqa.py --config full --model-class gpt-oss --large-model openai/gpt-oss-120b --small-model openai/gpt-oss-20b --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/gpt-oss/full --engine-timeout 1800
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
#
3+
#SBATCH --partition=p_nlp
4+
#SBATCH --job-name=rd-root-fc-fanoutqa-gpt-oss
5+
#SBATCH --output=/nlpgpu/data/andrz/logs/%j.%x.log
6+
#SBATCH --error=/nlpgpu/data/andrz/logs/%j.%x.log
7+
#SBATCH --time=7-0
8+
#SBATCH --nodes=1
9+
#SBATCH -c 16
10+
#SBATCH --mem=400G
11+
#SBATCH --gpus=8
12+
#SBATCH --mail-user=andrz@seas.upenn.edu
13+
#SBATCH --mail-type=END,FAIL
14+
#SBATCH --constraint=48GBgpu
15+
16+
source slurm/env.sh
17+
export VLLM_WORKER_MULTIPROC_METHOD=spawn
18+
python bench_fanoutqa.py --config root-fc --model-class gpt-oss --large-model openai/gpt-oss-120b --small-model openai/gpt-oss-20b --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/gpt-oss/root-fc --engine-timeout 1800
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
#
3+
#SBATCH --partition=p_nlp
4+
#SBATCH --job-name=rd-baseline-fanoutqa-gpt-oss
5+
#SBATCH --output=/nlpgpu/data/andrz/logs/%j.%x.log
6+
#SBATCH --error=/nlpgpu/data/andrz/logs/%j.%x.log
7+
#SBATCH --time=7-0
8+
#SBATCH --nodes=1
9+
#SBATCH -c 16
10+
#SBATCH --mem=400G
11+
#SBATCH --gpus=8
12+
#SBATCH --mail-user=andrz@seas.upenn.edu
13+
#SBATCH --mail-type=END,FAIL
14+
#SBATCH --constraint=48GBgpu
15+
16+
source slurm/env.sh
17+
export VLLM_WORKER_MULTIPROC_METHOD=spawn
18+
python bench_fanoutqa.py --config baseline --model-class gpt-oss --large-model openai/gpt-oss-120b --small-model openai/gpt-oss-20b --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/gpt-oss/baseline --engine-timeout 1800
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
#
3+
#SBATCH --partition=p_nlp
4+
#SBATCH --job-name=rd-small-leaf-fanoutqa-gpt-oss
5+
#SBATCH --output=/nlpgpu/data/andrz/logs/%j.%x.log
6+
#SBATCH --error=/nlpgpu/data/andrz/logs/%j.%x.log
7+
#SBATCH --time=7-0
8+
#SBATCH --nodes=1
9+
#SBATCH -c 16
10+
#SBATCH --mem=400G
11+
#SBATCH --gpus=8
12+
#SBATCH --mail-user=andrz@seas.upenn.edu
13+
#SBATCH --mail-type=END,FAIL
14+
#SBATCH --constraint=48GBgpu
15+
16+
source slurm/env.sh
17+
export VLLM_WORKER_MULTIPROC_METHOD=spawn
18+
python bench_fanoutqa.py --config small-leaf --model-class gpt-oss --large-model openai/gpt-oss-120b --small-model openai/gpt-oss-20b --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/gpt-oss/small-leaf --engine-timeout 1800
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
#
3+
#SBATCH --partition=p_nlp
4+
#SBATCH --job-name=rd-small-all-fanoutqa-gpt-oss
5+
#SBATCH --output=/nlpgpu/data/andrz/logs/%j.%x.log
6+
#SBATCH --error=/nlpgpu/data/andrz/logs/%j.%x.log
7+
#SBATCH --time=7-0
8+
#SBATCH --nodes=1
9+
#SBATCH -c 16
10+
#SBATCH --mem=400G
11+
#SBATCH --gpus=8
12+
#SBATCH --mail-user=andrz@seas.upenn.edu
13+
#SBATCH --mail-type=END,FAIL
14+
#SBATCH --constraint=48GBgpu
15+
16+
source slurm/env.sh
17+
export VLLM_WORKER_MULTIPROC_METHOD=spawn
18+
python bench_fanoutqa.py --config small-all --model-class gpt-oss --large-model openai/gpt-oss-120b --small-model openai/gpt-oss-20b --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/gpt-oss/small-all --engine-timeout 1800
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
#
3+
#SBATCH --partition=p_nlp
4+
#SBATCH --job-name=rd-small-baseline-fanoutqa-gpt-oss
5+
#SBATCH --output=/nlpgpu/data/andrz/logs/%j.%x.log
6+
#SBATCH --error=/nlpgpu/data/andrz/logs/%j.%x.log
7+
#SBATCH --time=7-0
8+
#SBATCH --nodes=1
9+
#SBATCH -c 16
10+
#SBATCH --mem=400G
11+
#SBATCH --gpus=8
12+
#SBATCH --mail-user=andrz@seas.upenn.edu
13+
#SBATCH --mail-type=END,FAIL
14+
#SBATCH --constraint=48GBgpu
15+
16+
source slurm/env.sh
17+
export VLLM_WORKER_MULTIPROC_METHOD=spawn
18+
python bench_fanoutqa.py --config small-baseline --model-class gpt-oss --large-model openai/gpt-oss-120b --small-model openai/gpt-oss-20b --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/gpt-oss/small-baseline --engine-timeout 1800

0 commit comments

Comments
 (0)