feat: make gt assertion optional

terryyz · terryyz · commit 7c7886e41073 · 2024-06-04T01:46:36.000+08:00
diff --git a/README.md b/README.md
@@ -189,6 +189,8 @@ You are strongly recommended to use a sandbox such as [docker](https://docs.dock
 docker run -v $(pwd):/bigcodebench terryzho/bigcodebench-evaluate:latest --subset [complete|instruct] --samples samples.jsonl
 # ...Or locally ⚠️
 bigcodebench.evaluate --subset [complete|instruct] --samples samples.jsonl
+# ...If the ground truth is working
+bigcodebench.evaluate --subset [complete|instruct] --samples samples.jsonl --no-gt
 ```
 
 ...Or if you want to try it locally regardless of the risks ⚠️:
diff --git a/bigcodebench/evaluate.py b/bigcodebench/evaluate.py
@@ -118,8 +118,11 @@ def evaluate(flags):
         results = compatible_eval_result(results)
     else:
         problems = get_bigcodebench()
-        dataset_hash = get_bigcodebench_hash()       
-        expected_time = get_groundtruth(problems, dataset_hash, flags.check_gt_only)
+        dataset_hash = get_bigcodebench_hash()
+        if flags.no_gt:
+            expected_time = [20]*len(problems)
+        else:
+            expected_time = get_groundtruth(problems, dataset_hash, flags.check_gt_only)
         
         if flags.check_gt_only:
             return
@@ -253,6 +256,9 @@ def main():
     parser.add_argument(
         "--check-gt-only", action="store_true", help="Check the groundtruth"
     )
+    parser.add_argument(
+        "--no-gt", action="store_true", help="Check the groundtruth"
+    )
     args = parser.parse_args()
 
     evaluate(args)