Skip to content

Commit 8440831

Browse files
committed
fix lint
Signed-off-by: Huamin Chen <[email protected]>
1 parent 8952bc9 commit 8440831

File tree

11 files changed

+470
-318
lines changed

11 files changed

+470
-318
lines changed

bench/plot_comprehensive_results.py

Lines changed: 268 additions & 182 deletions
Large diffs are not rendered by default.

bench/vllm_semantic_router_bench/cli.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,15 @@ def main():
4040
test_parser.add_argument(
4141
"--dataset",
4242
required=True,
43-
choices=["mmlu", "arc", "arc-challenge", "gpqa", "truthfulqa", "commonsenseqa", "hellaswag"],
43+
choices=[
44+
"mmlu",
45+
"arc",
46+
"arc-challenge",
47+
"gpqa",
48+
"truthfulqa",
49+
"commonsenseqa",
50+
"hellaswag",
51+
],
4452
help="Dataset to test",
4553
)
4654
test_parser.add_argument(
@@ -68,7 +76,15 @@ def main():
6876
compare_parser.add_argument(
6977
"--dataset",
7078
required=True,
71-
choices=["mmlu", "arc", "arc-challenge", "gpqa", "truthfulqa", "commonsenseqa", "hellaswag"],
79+
choices=[
80+
"mmlu",
81+
"arc",
82+
"arc-challenge",
83+
"gpqa",
84+
"truthfulqa",
85+
"commonsenseqa",
86+
"hellaswag",
87+
],
7288
help="Dataset to benchmark",
7389
)
7490
compare_parser.add_argument(
@@ -119,7 +135,14 @@ def main():
119135
comprehensive_parser.add_argument(
120136
"--datasets",
121137
nargs="+",
122-
default=["mmlu", "arc-challenge", "gpqa", "truthfulqa", "commonsenseqa", "hellaswag"],
138+
default=[
139+
"mmlu",
140+
"arc-challenge",
141+
"gpqa",
142+
"truthfulqa",
143+
"commonsenseqa",
144+
"hellaswag",
145+
],
123146
help="Datasets to benchmark",
124147
)
125148
comprehensive_parser.add_argument(

bench/vllm_semantic_router_bench/dataset_factory.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
)
2424
from .dataset_implementations.gsm8k_dataset import GSM8KDataset
2525
from .dataset_implementations.hellaswag_dataset import HellaSwagDataset
26+
2627
# from .dataset_implementations.math_dataset import MATHDataset # Disabled - dataset not available
2728
from .dataset_implementations.mmlu_dataset import MMLUDataset
2829
from .dataset_implementations.openbookqa_dataset import OpenBookQADataset

bench/vllm_semantic_router_bench/dataset_implementations/aqua_rat_dataset.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def load_dataset(
8484
if samples_per_category:
8585
np.random.seed(seed)
8686
random.seed(seed)
87-
87+
8888
sample_size = min(samples_per_category, len(df))
8989
df = df.sample(n=sample_size, random_state=seed)
9090

@@ -101,7 +101,8 @@ def load_dataset(
101101
for option in raw_options:
102102
# Remove letter prefix like "A)", "B)", etc.
103103
import re
104-
cleaned = re.sub(r'^[A-E]\)', '', option).strip()
104+
105+
cleaned = re.sub(r"^[A-E]\)", "", option).strip()
105106
options.append(cleaned)
106107

107108
question = Question(
@@ -115,7 +116,7 @@ def load_dataset(
115116
"difficulty": "Moderate",
116117
"type": "algebraic_word_problem",
117118
"rationale": rationale,
118-
}
119+
},
119120
)
120121
questions.append(question)
121122

@@ -132,8 +133,10 @@ def load_dataset(
132133

133134
def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
134135
"""Format prompt for AQUA-RAT questions."""
135-
options_text = "\n".join([f"{chr(65+i)}) {opt}" for i, opt in enumerate(question.options)])
136-
136+
options_text = "\n".join(
137+
[f"{chr(65+i)}) {opt}" for i, opt in enumerate(question.options)]
138+
)
139+
137140
if prompt_style == "plain":
138141
return f"""Solve this algebraic word problem:
139142
@@ -145,7 +148,7 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
145148
ANSWER: [letter]
146149
147150
For example: ANSWER: A"""
148-
151+
149152
elif prompt_style == "explicit_cot":
150153
return f"""Solve this algebraic word problem step by step:
151154
@@ -165,6 +168,6 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
165168
ANSWER: [letter]
166169
167170
For example: ANSWER: A"""
168-
171+
169172
else:
170173
raise ValueError(f"Unknown prompt style: {prompt_style}")

bench/vllm_semantic_router_bench/dataset_implementations/drop_dataset.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def load_dataset(
8484
if samples_per_category:
8585
np.random.seed(seed)
8686
random.seed(seed)
87-
87+
8888
sample_size = min(samples_per_category, len(df))
8989
df = df.sample(n=sample_size, random_state=seed)
9090

@@ -115,7 +115,7 @@ def load_dataset(
115115
"type": "discrete_reasoning",
116116
"passage": passage,
117117
"question_only": question_text,
118-
}
118+
},
119119
)
120120
questions.append(question)
121121

@@ -141,7 +141,7 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
141141
ANSWER: [your answer]
142142
143143
For example: ANSWER: 68.5 or ANSWER: germans or ANSWER: Centenary Medal"""
144-
144+
145145
elif prompt_style == "explicit_cot":
146146
return f"""{question.question}
147147
@@ -156,6 +156,6 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
156156
ANSWER: [your answer]
157157
158158
For example: ANSWER: 68.5 or ANSWER: germans or ANSWER: Centenary Medal"""
159-
159+
160160
else:
161161
raise ValueError(f"Unknown prompt style: {prompt_style}")

bench/vllm_semantic_router_bench/dataset_implementations/gsm8k_dataset.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def load_dataset(
8484
if samples_per_category:
8585
np.random.seed(seed)
8686
random.seed(seed)
87-
87+
8888
sample_size = min(samples_per_category, len(df))
8989
df = df.sample(n=sample_size, random_state=seed)
9090

@@ -93,11 +93,12 @@ def load_dataset(
9393
for _, row in df.iterrows():
9494
question_text = row["question"]
9595
answer_text = row["answer"]
96-
96+
9797
# Extract the final numerical answer from the solution
9898
import re
99+
99100
# GSM8K answers end with "#### [number]"
100-
answer_match = re.search(r'####\s*([0-9,.-]+)', answer_text)
101+
answer_match = re.search(r"####\s*([0-9,.-]+)", answer_text)
101102
correct_answer = answer_match.group(1) if answer_match else "Unknown"
102103

103104
question = Question(
@@ -111,7 +112,7 @@ def load_dataset(
111112
"difficulty": "Elementary",
112113
"type": "word_problem",
113114
"solution": answer_text,
114-
}
115+
},
115116
)
116117
questions.append(question)
117118

@@ -137,7 +138,7 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
137138
ANSWER: [number]
138139
139140
For example: ANSWER: 42"""
140-
141+
141142
elif prompt_style == "explicit_cot":
142143
return f"""Solve this math word problem step by step, showing all your work:
143144
@@ -154,6 +155,6 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
154155
ANSWER: [number]
155156
156157
For example: ANSWER: 42"""
157-
158+
158159
else:
159160
raise ValueError(f"Unknown prompt style: {prompt_style}")

bench/vllm_semantic_router_bench/dataset_implementations/math_dataset.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,11 @@ def load_dataset(
115115
# The solution contains the final answer in \boxed{} format
116116
question_text = row["problem"]
117117
solution = row["solution"]
118-
118+
119119
# Extract boxed answer as the correct answer
120120
import re
121-
boxed_match = re.search(r'\\boxed\{([^}]+)\}', solution)
121+
122+
boxed_match = re.search(r"\\boxed\{([^}]+)\}", solution)
122123
correct_answer = boxed_match.group(1) if boxed_match else "Unknown"
123124

124125
question = Question(
@@ -132,7 +133,7 @@ def load_dataset(
132133
"level": row.get("level", "Unknown"),
133134
"subject": row["type"],
134135
"solution": solution,
135-
}
136+
},
136137
)
137138
questions.append(question)
138139

@@ -151,7 +152,7 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
151152
"""Format prompt for MATH questions."""
152153
if prompt_style == "plain":
153154
return f"Solve this mathematics problem step by step:\n\n{question.question}\n\nProvide your final answer in the format: Answer: [your answer]"
154-
155+
155156
elif prompt_style == "explicit_cot":
156157
return f"""Solve this mathematics problem step by step, showing all your work:
157158
@@ -165,6 +166,6 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
165166
5. Verify your answer
166167
167168
Provide your final answer in the format: Answer: [your answer]"""
168-
169+
169170
else:
170171
raise ValueError(f"Unknown prompt style: {prompt_style}")

bench/vllm_semantic_router_bench/dataset_implementations/openbookqa_dataset.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def load_dataset(
8484
if samples_per_category:
8585
np.random.seed(seed)
8686
random.seed(seed)
87-
87+
8888
sample_size = min(samples_per_category, len(df))
8989
df = df.sample(n=sample_size, random_state=seed)
9090

@@ -94,13 +94,16 @@ def load_dataset(
9494
question_stem = row["question_stem"]
9595
choices = row["choices"]
9696
answer_key = row["answerKey"] # A, B, C, D
97-
97+
9898
# Extract options from choices
9999
# Handle different possible structures for choices
100100
if isinstance(choices, dict) and "text" in choices:
101101
options = choices["text"]
102102
elif isinstance(choices, list):
103-
options = [choice["text"] if isinstance(choice, dict) else choice for choice in choices]
103+
options = [
104+
choice["text"] if isinstance(choice, dict) else choice
105+
for choice in choices
106+
]
104107
else:
105108
options = [str(choices)] # Fallback
106109

@@ -115,7 +118,7 @@ def load_dataset(
115118
"difficulty": "Elementary",
116119
"type": "science_reasoning",
117120
"requires_fact_combination": True,
118-
}
121+
},
119122
)
120123
questions.append(question)
121124

@@ -132,8 +135,10 @@ def load_dataset(
132135

133136
def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
134137
"""Format prompt for OpenBookQA questions."""
135-
options_text = "\n".join([f"{chr(65+i)}) {opt}" for i, opt in enumerate(question.options)])
136-
138+
options_text = "\n".join(
139+
[f"{chr(65+i)}) {opt}" for i, opt in enumerate(question.options)]
140+
)
141+
137142
if prompt_style == "plain":
138143
return f"""Question: {question.question}
139144
@@ -142,7 +147,7 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
142147
Think about what scientific facts and principles apply to this question.
143148
144149
Provide your answer in the format 'Answer: [letter]'."""
145-
150+
146151
elif prompt_style == "explicit_cot":
147152
return f"""Question: {question.question}
148153
@@ -157,6 +162,6 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
157162
5. Select the best answer based on scientific principles
158163
159164
Show your scientific reasoning step by step, then provide your answer in the format 'Answer: [letter]'."""
160-
165+
161166
else:
162167
raise ValueError(f"Unknown prompt style: {prompt_style}")

bench/vllm_semantic_router_bench/dataset_implementations/sciq_dataset.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def load_dataset(
8484
if samples_per_category:
8585
np.random.seed(seed)
8686
random.seed(seed)
87-
87+
8888
sample_size = min(samples_per_category, len(df))
8989
df = df.sample(n=sample_size, random_state=seed)
9090

@@ -93,13 +93,13 @@ def load_dataset(
9393
for _, row in df.iterrows():
9494
question_text = row["question"]
9595
correct_answer = row["correct_answer"]
96-
96+
9797
# Build options list
9898
options = [
9999
row["correct_answer"],
100-
row["distractor1"],
100+
row["distractor1"],
101101
row["distractor2"],
102-
row["distractor3"]
102+
row["distractor3"],
103103
]
104104
# Shuffle options and find correct index
105105
random.seed(42) # Fixed seed for reproducible option order
@@ -118,8 +118,10 @@ def load_dataset(
118118
metadata={
119119
"difficulty": "Moderate",
120120
"type": "science_multiple_choice",
121-
"support": row.get("support", ""), # Background passage if available
122-
}
121+
"support": row.get(
122+
"support", ""
123+
), # Background passage if available
124+
},
123125
)
124126
questions.append(question)
125127

@@ -136,20 +138,22 @@ def load_dataset(
136138

137139
def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
138140
"""Format prompt for SciQ questions."""
139-
options_text = "\n".join([f"{chr(65+i)}) {opt}" for i, opt in enumerate(question.options)])
140-
141+
options_text = "\n".join(
142+
[f"{chr(65+i)}) {opt}" for i, opt in enumerate(question.options)]
143+
)
144+
141145
# Add support passage if available
142146
support_text = ""
143147
if question.metadata and question.metadata.get("support"):
144148
support_text = f"Background: {question.metadata['support']}\n\n"
145-
149+
146150
if prompt_style == "plain":
147151
return f"""{support_text}Question: {question.question}
148152
149153
{options_text}
150154
151155
Provide your answer in the format 'Answer: [letter]'."""
152-
156+
153157
elif prompt_style == "explicit_cot":
154158
return f"""{support_text}Question: {question.question}
155159
@@ -164,6 +168,6 @@ def format_prompt(self, question: Question, prompt_style: str = "plain") -> str:
164168
5. Select the best answer
165169
166170
Show your scientific reasoning step by step, then provide your answer in the format 'Answer: [letter]'."""
167-
171+
168172
else:
169173
raise ValueError(f"Unknown prompt style: {prompt_style}")

0 commit comments

Comments
 (0)