Skip to content

Commit a16f9e0

Browse files
authored
[Update] Update LiveMathBench Hard Configs (open-compass#1826)
* support G-Pass@k and livemathbench * fix bugs * fix comments of GPassKEvaluator * update saved details of GPassKEvaluator * update saved details of GPassKEvaluator * fix eval api configs & update openai_api for ease of debugging * update huggingface path * fix method name of G-Pass@k * fix default value of eval_model_name * refactor G-Pass@k evaluator * log generation params for each backend * fix evaluation resume * add notimplementerror * update livemathbench-hard configs * remove max_out_len from livemathbench_hard_greedy_gen_9befbf.py * remove max_out_len from livemathbench_hard_gen_9befbf.py * rename livemathbench_hard_gen_9befbf.py to livemathbench_hard_gen_353ae7.py * rename livemathbench_hard_greedy_gen_9befbf.py to livemathbench_hard_greedy_gen_353ae7.py * update livemathbench_gen_9befbf.py * remove whitespace * upload livemathbench hard configs
1 parent e112721 commit a16f9e0

File tree

7 files changed

+103
-54
lines changed

7 files changed

+103
-54
lines changed

opencompass/configs/datasets/livemathbench/livemathbench_gen_9befbf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,4 @@
4848
)
4949
)
5050
)
51-
livemathbench_datasets = [livemathbench_dataset]
51+
livemathbench_datasets = [livemathbench_dataset]

opencompass/configs/datasets/livemathbench/livemathbench_gen_f1c095.py

Lines changed: 0 additions & 49 deletions
This file was deleted.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from mmengine.config import read_base
22

33
with read_base():
4-
from .livemathbench_greedy_gen_efb20d import livemathbench_datasets # noqa: F401, F403
4+
from .livemathbench_greedy_gen_9befbf import livemathbench_datasets # noqa: F401, F403
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from opencompass.openicl.icl_prompt_template import PromptTemplate
2+
from opencompass.openicl.icl_retriever import ZeroRetriever
3+
from opencompass.openicl.icl_inferencer import GenInferencer
4+
5+
from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator
6+
7+
8+
livemathbench_dataset = dict(
9+
type=LiveMathBenchDataset,
10+
path='',
11+
k=16,
12+
replication=3,
13+
dataset_splits=['hard'],
14+
dataset_languages=['cn', 'en'],
15+
cot=True,
16+
version='202412',
17+
abbr='LiveMathBench-v202412-Hard',
18+
reader_cfg=dict(
19+
input_columns=['prompt'],
20+
output_column='answer'
21+
),
22+
infer_cfg=dict(
23+
prompt_template=dict(
24+
type=PromptTemplate,
25+
template=dict(
26+
round=[
27+
dict(role='HUMAN', prompt='{prompt}'),
28+
]
29+
)
30+
),
31+
retriever=dict(type=ZeroRetriever),
32+
inferencer=dict(
33+
type=GenInferencer
34+
),
35+
),
36+
eval_cfg=dict(
37+
evaluator=dict(
38+
type=LiveMathBenchEvaluator,
39+
model_name='',
40+
url=[],
41+
use_extract_model=False,
42+
extract_url=[],
43+
extract_model_name='',
44+
k=[4, 8, 16],
45+
replication=3,
46+
thresholds=[0.0, 0.25, 0.5, 0.75, 1.0]
47+
)
48+
)
49+
)
50+
livemathbench_datasets = [livemathbench_dataset]
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from opencompass.openicl.icl_prompt_template import PromptTemplate
2+
from opencompass.openicl.icl_retriever import ZeroRetriever
3+
from opencompass.openicl.icl_inferencer import GenInferencer
4+
5+
from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator
6+
7+
8+
livemathbench_dataset = dict(
9+
type=LiveMathBenchDataset,
10+
path='',
11+
k=1,
12+
replication=1,
13+
dataset_splits=['hard'],
14+
dataset_languages=['cn', 'en'],
15+
cot=True,
16+
version='202412',
17+
abbr='LiveMathBench-v202412-Hard',
18+
reader_cfg=dict(
19+
input_columns=['prompt'],
20+
output_column='answer'
21+
),
22+
infer_cfg=dict(
23+
prompt_template=dict(
24+
type=PromptTemplate,
25+
template=dict(
26+
round=[
27+
dict(role='HUMAN', prompt='{prompt}'),
28+
]
29+
)
30+
),
31+
retriever=dict(type=ZeroRetriever),
32+
inferencer=dict(
33+
type=GenInferencer
34+
),
35+
),
36+
eval_cfg=dict(
37+
evaluator=dict(
38+
type=LiveMathBenchEvaluator,
39+
model_name='',
40+
url=[],
41+
use_extract_model=False,
42+
extract_url=[],
43+
extract_model_name='',
44+
k=[1],
45+
replication=1,
46+
thresholds=[0.0]
47+
)
48+
)
49+
)
50+
livemathbench_datasets = [livemathbench_dataset]

opencompass/datasets/livemathbench/livemathbench.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def load(path: str,
4848
if path != '':
4949
path = get_data_path(path)
5050
path = os.path.join(path, version)
51+
5152
for split, language in product(dataset_splits, dataset_languages):
5253
dataset_info[f'{split}_{language}'] = {
5354
'single-choice': 0,
@@ -64,7 +65,6 @@ def load(path: str,
6465

6566
if path != '':
6667
file_path = os.path.join(path, f'{split}_{language}.jsonl')
67-
6868
if not os.path.exists(file_path):
6969
raise FileNotFoundError(
7070
f'File {file_path} does not exist, please check the '

opencompass/models/turbomind_with_tf_above_v4_33.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,6 @@ def generate(self,
164164
self.logger.info('Generation Config of LMdeploy: ')
165165
self.logger.info(gen_config)
166166

167-
168-
169167
results = []
170168
outputs = self.pipe(messages, gen_config=gen_config, do_preprocess=False)
171169
for output in outputs:

0 commit comments

Comments
 (0)