Skip to content

Commit f7d8998

Browse files
authored
[Update] Update mmmlu_lite dataload (#1658)
* update mmmlu_lite dataload from oss * update mmmlu_lite dataload from oss
1 parent c789ce5 commit f7d8998

File tree

4 files changed

+24
-12
lines changed

4 files changed

+24
-12
lines changed

opencompass/configs/datasets/mmmlu_lite/README.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,8 @@ MMMLU contains the MMLU test set translated into the following locales:
3131

3232

3333
## How to Use
34-
Download file from [link](https://hf-mirror.com/datasets/openai/MMMLU)
3534

3635
```python
3736
from datasets import load_dataset
38-
ds = load_dataset("openai/MMMLU", "default")
39-
from datasets import load_dataset
40-
ds = load_dataset("openai/MMMLU", "by_language")
37+
ds = load_dataset("opencompass/mmmlu_lite", "AR_XY")
4138
```

opencompass/configs/datasets/mmmlu_lite/mmmlu_lite_gen_c51a84.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,7 @@
9595
dict(
9696
abbr=f'openai_m{_name}',
9797
type=MMMLULiteDataset,
98-
# path='opencompass/mmmlu_lite',
99-
path = './data/mmmlu_lite',
98+
path='opencompass/mmmlu_lite',
10099
name=f'openai_m{_name}',
101100
reader_cfg=mmmlu_lite_reader_cfg,
102101
infer_cfg=mmmlu_lite_infer_cfg,

opencompass/datasets/mmmlu.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# yapf: disable
33

44
import json
5-
import os
5+
import os.path as osp
66

77
from datasets import Dataset, DatasetDict, load_dataset
88

@@ -43,10 +43,12 @@ class MMMLULiteDataset(BaseDataset):
4343

4444
@staticmethod
4545
def load(path: str, name: str):
46+
path = get_data_path(path, local_mode=False)
4647
dataset = DatasetDict()
47-
path = os.path.join(path, name + '.jsonl')
48-
dataset_list = []
49-
with open(path, 'r') as f:
50-
dataset_list = [json.loads(line) for line in f.readlines()]
51-
dataset['test'] = Dataset.from_list(dataset_list)
48+
name = name.split('_')[-1]
49+
raw_data = []
50+
filename = osp.join(path, name, 'test.jsonl')
51+
with open(filename, encoding='utf-8') as f:
52+
raw_data = [json.loads(line) for line in f.readlines()]
53+
dataset['test'] = Dataset.from_list(raw_data)
5254
return dataset

opencompass/utils/datasets_info.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,16 @@
316316
"ms_id": "",
317317
"hf_id": "",
318318
"local": "./data/WikiBench/",
319+
},
320+
"opencompass/mmmlu_lite": {
321+
"ms_id": "",
322+
"hf_id": "",
323+
"local": "./data/mmmlu_lite",
324+
},
325+
"opencompass/mmmlu_lite": {
326+
"ms_id": "",
327+
"hf_id": "",
328+
"local": "./data/mmmlu_lite",
319329
}
320330
}
321331

@@ -324,6 +334,10 @@
324334
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu.zip",
325335
"md5": "761310671509a239e41c4b717f7fab9c",
326336
},
337+
"/mmmlu_lite": {
338+
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmmlu_lite.zip",
339+
"md5": "a776af1220e1826fd0608eda1bc4425e",
340+
},
327341
"/gpqa/": {
328342
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/gpqa.zip",
329343
"md5": "2e9657959030a765916f1f2aca29140d",

0 commit comments

Comments
 (0)