Skip to content

Commit 3d24a42

Browse files
committed
fix bugs.
1 parent f0b7440 commit 3d24a42

File tree

4 files changed

+22
-11
lines changed

4 files changed

+22
-11
lines changed

examples/benchmark/ceval/README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@
1010
wget https://huggingface.co/datasets/ceval/ceval-exam/resolve/main/ceval-exam.zip
1111
unzip ceval-exam.zip -d data
1212
```
13-
将data文件夹放置于本项目的scripts/ceval目录下。
1413

1514
## 运行预测脚本
1615

17-
运行以下脚本
16+
在当前目录运行以下脚本
1817

1918
```
20-
cd scripts/ceval
2119
python eval.py \
2220
--model_name_or_path /path/to/your/model \
2321
--cot False \

examples/benchmark/mmlu/categories.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# Adapted from https://github.com/hendrycks/test
15+
116
subcategories = {
217
"abstract_algebra": ["math"],
318
"anatomy": ["health"],

examples/benchmark/mmlu/eval.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
15-
# Adapted from https://github.com/ymcui/Chinese-LLaMA-Alpaca and https://github.com/SJTU-LIT/ceval
14+
# Adapted from https://github.com/hendrycks/test
1615
import argparse
1716
import json
1817
import os
@@ -65,8 +64,8 @@ def main(args, evaluator):
6564
print("Average accuracy {:.3f} - {}".format(subcat_acc, subcat))
6665
summary[subcat] = {
6766
"acc:": subcat_acc,
68-
"correct:": np.sum(np.concatenate(subcat_cors[subcat])),
69-
"num:": np.concatenate(subcat_cors[subcat]).size,
67+
"correct:": int(np.sum(np.concatenate(subcat_cors[subcat]))),
68+
"num:": int(np.concatenate(subcat_cors[subcat]).size),
7069
}
7170

7271
for cat in cat_cors:
@@ -77,8 +76,8 @@ def main(args, evaluator):
7776
print("Model:", args.model_name_or_path)
7877
summary["All"] = {
7978
"acc:": weighted_acc,
80-
"correct:": np.sum(np.concatenate(all_cors)),
81-
"num:": np.concatenate(all_cors).size,
79+
"correct:": int(np.sum(np.concatenate(all_cors))),
80+
"num:": int(np.concatenate(all_cors).size),
8281
}
8382
json.dump(
8483
summary,

examples/benchmark/mmlu/evaluator.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,9 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
# Adapted from https://github.com/hendrycks/test
1415

1516
import numpy as np
16-
17-
# Adapted from https://github.com/ymcui/Chinese-LLaMA-Alpaca and https://github.com/SJTU-LIT/ceval
1817
import paddle
1918
from tqdm import tqdm
2019

0 commit comments

Comments
 (0)