Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
eeb76eb
support G-Pass@k and livemathbench
jnanliu Dec 20, 2024
b856865
fix bugs
jnanliu Dec 20, 2024
f19b798
fix comments of GPassKEvaluator
jnanliu Dec 20, 2024
66cad93
update saved details of GPassKEvaluator
jnanliu Dec 20, 2024
0a8807f
update saved details of GPassKEvaluator
jnanliu Dec 20, 2024
3fdc500
fix eval api configs & update openai_api for ease of debugging
jnanliu Dec 21, 2024
6ca63ca
Merge branch 'main' into g-passk
jnanliu Dec 21, 2024
f0e2edd
update huggingface path
jnanliu Dec 23, 2024
98983e6
Merge branch 'g-passk' of github.com:jnanliu/opencompass into g-passk
jnanliu Dec 23, 2024
dfbe983
fix method name of G-Pass@k
jnanliu Dec 23, 2024
1dd6b77
fix default value of eval_model_name
jnanliu Dec 23, 2024
8280c11
refactor G-Pass@k evaluator
jnanliu Dec 25, 2024
ab8cb95
log generation params for each backend
jnanliu Dec 25, 2024
3509a26
fix evaluation resume
jnanliu Dec 26, 2024
bcc74fd
add notimplementerror
jnanliu Dec 27, 2024
1f5161c
update livemathbench-hard configs
jnanliu Jan 15, 2025
f7c8cda
Merge branch 'main' into g-passk
jnanliu Jan 15, 2025
63ae127
Merge branch 'main' into g-passk
jnanliu Feb 25, 2025
3213c01
remove max_out_len from livemathbench_hard_greedy_gen_9befbf.py
jnanliu Feb 25, 2025
dd3ce15
remove max_out_len from livemathbench_hard_gen_9befbf.py
jnanliu Feb 25, 2025
7f59be8
rename livemathbench_hard_gen_9befbf.py to livemathbench_hard_gen_353…
jnanliu Feb 25, 2025
2ca63ff
rename livemathbench_hard_greedy_gen_9befbf.py to livemathbench_hard_…
jnanliu Feb 25, 2025
33a9c4a
update livemathbench_gen_9befbf.py
jnanliu Feb 25, 2025
daf4337
remove whitespace
jnanliu Feb 25, 2025
b55cb1a
upload livemathbench hard configs
jnanliu Feb 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@
)
)
)
livemathbench_datasets = [livemathbench_dataset]
livemathbench_datasets = [livemathbench_dataset]

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mmengine.config import read_base

with read_base():
from .livemathbench_greedy_gen_efb20d import livemathbench_datasets # noqa: F401, F403
from .livemathbench_greedy_gen_9befbf import livemathbench_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer

from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator


livemathbench_dataset = dict(
type=LiveMathBenchDataset,
path='',
k=16,
replication=3,
dataset_splits=['hard'],
dataset_languages=['cn', 'en'],
cot=True,
version='202412',
abbr='LiveMathBench-v202412-Hard',
reader_cfg=dict(
input_columns=['prompt'],
output_column='answer'
),
infer_cfg=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{prompt}'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer
),
),
eval_cfg=dict(
evaluator=dict(
type=LiveMathBenchEvaluator,
model_name='',
url=[],
use_extract_model=False,
extract_url=[],
extract_model_name='',
k=[4, 8, 16],
replication=3,
thresholds=[0.0, 0.25, 0.5, 0.75, 1.0]
)
)
)
livemathbench_datasets = [livemathbench_dataset]
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer

from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator


livemathbench_dataset = dict(
type=LiveMathBenchDataset,
path='',
k=1,
replication=1,
dataset_splits=['hard'],
dataset_languages=['cn', 'en'],
cot=True,
version='202412',
abbr='LiveMathBench-v202412-Hard',
reader_cfg=dict(
input_columns=['prompt'],
output_column='answer'
),
infer_cfg=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{prompt}'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer
),
),
eval_cfg=dict(
evaluator=dict(
type=LiveMathBenchEvaluator,
model_name='',
url=[],
use_extract_model=False,
extract_url=[],
extract_model_name='',
k=[1],
replication=1,
thresholds=[0.0]
)
)
)
livemathbench_datasets = [livemathbench_dataset]
2 changes: 1 addition & 1 deletion opencompass/datasets/livemathbench/livemathbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def load(path: str,
if path != '':
path = get_data_path(path)
path = os.path.join(path, version)

for split, language in product(dataset_splits, dataset_languages):
dataset_info[f'{split}_{language}'] = {
'single-choice': 0,
Expand All @@ -64,7 +65,6 @@ def load(path: str,

if path != '':
file_path = os.path.join(path, f'{split}_{language}.jsonl')

if not os.path.exists(file_path):
raise FileNotFoundError(
f'File {file_path} does not exist, please check the '
Expand Down
2 changes: 0 additions & 2 deletions opencompass/models/turbomind_with_tf_above_v4_33.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,6 @@ def generate(self,
self.logger.info('Generation Config of LMdeploy: ')
self.logger.info(gen_config)



results = []
outputs = self.pipe(messages, gen_config=gen_config, do_preprocess=False)
for output in outputs:
Expand Down