fix bugs.

zzjjay · zzjjay · commit 3d24a42c32dc · 2023-08-22T10:47:21.000+08:00
diff --git a/examples/benchmark/ceval/README.md b/examples/benchmark/ceval/README.md
@@ -10,14 +10,12 @@
 wget https://huggingface.co/datasets/ceval/ceval-exam/resolve/main/ceval-exam.zip
 unzip ceval-exam.zip -d data
 ```
-将data文件夹放置于本项目的scripts/ceval目录下。
 
 ## 运行预测脚本
 
-运行以下脚本：
+在当前目录运行以下脚本：
 
 ```
-cd scripts/ceval
 python eval.py \
     --model_name_or_path /path/to/your/model \
     --cot False \
diff --git a/examples/benchmark/mmlu/categories.py b/examples/benchmark/mmlu/categories.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Adapted from https://github.com/hendrycks/test
+
 subcategories = {
     "abstract_algebra": ["math"],
     "anatomy": ["health"],
diff --git a/examples/benchmark/mmlu/eval.py b/examples/benchmark/mmlu/eval.py
@@ -11,8 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-# Adapted from https://github.com/ymcui/Chinese-LLaMA-Alpaca and https://github.com/SJTU-LIT/ceval
+# Adapted from https://github.com/hendrycks/test
 import argparse
 import json
 import os
@@ -65,8 +64,8 @@ def main(args, evaluator):
         print("Average accuracy {:.3f} - {}".format(subcat_acc, subcat))
         summary[subcat] = {
             "acc:": subcat_acc,
-            "correct:": np.sum(np.concatenate(subcat_cors[subcat])),
-            "num:": np.concatenate(subcat_cors[subcat]).size,
+            "correct:": int(np.sum(np.concatenate(subcat_cors[subcat]))),
+            "num:": int(np.concatenate(subcat_cors[subcat]).size),
         }
 
     for cat in cat_cors:
@@ -77,8 +76,8 @@ def main(args, evaluator):
     print("Model:", args.model_name_or_path)
     summary["All"] = {
         "acc:": weighted_acc,
-        "correct:": np.sum(np.concatenate(all_cors)),
-        "num:": np.concatenate(all_cors).size,
+        "correct:": int(np.sum(np.concatenate(all_cors))),
+        "num:": int(np.concatenate(all_cors).size),
     }
     json.dump(
         summary,
diff --git a/examples/benchmark/mmlu/evaluator.py b/examples/benchmark/mmlu/evaluator.py
@@ -11,10 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# Adapted from https://github.com/hendrycks/test
 
 import numpy as np
-
-# Adapted from https://github.com/ymcui/Chinese-LLaMA-Alpaca and https://github.com/SJTU-LIT/ceval
 import paddle
 from tqdm import tqdm