Skip to content

Commit d7cfe54

Browse files
committed
fix: make auto calibration
1 parent 7c7886e commit d7cfe54

File tree

1 file changed

+3
-15
lines changed

1 file changed

+3
-15
lines changed

bigcodebench/evaluate.py

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -95,20 +95,11 @@ def evaluate(flags):
9595
# bypass the samples
9696
flags.samples = "__dummy__.jsonl"
9797

98-
if flags.calibrate:
99-
assert "calibrate" in flags.samples, "Calibration is only supported for calibrated samples"
100-
10198
if os.path.isdir(flags.samples):
102-
if flags.calibrate:
103-
result_path = os.path.join(flags.samples, "calibrate_eval_results.json")
104-
else:
105-
result_path = os.path.join(flags.samples, "eval_results.json")
99+
result_path = os.path.join(flags.samples, "eval_results.json")
106100
else:
107101
assert flags.samples.endswith(".jsonl")
108-
if flags.calibrate:
109-
result_path = flags.samples.replace(".jsonl", "_calibrate_eval_results.json")
110-
else:
111-
result_path = flags.samples.replace(".jsonl", "_eval_results.json")
102+
result_path = flags.samples.replace(".jsonl", "_eval_results.json")
112103

113104
if os.path.isfile(result_path):
114105
print(f"Load from previous results from {result_path}")
@@ -153,7 +144,7 @@ def evaluate(flags):
153144
if "solution" in sample
154145
else problems[task_id]["prompt"] + sample["completion"]
155146
)
156-
if flags.calibrate:
147+
if "sanitized-calibrate" in flags.samples:
157148
solution = problems[task_id]["prompt_wo_doc"] + "\n pass\n" + solution
158149
remainings.add(sample["_identifier"])
159150
args = (
@@ -250,9 +241,6 @@ def main():
250241
parser.add_argument("--samples", required=True, type=str)
251242
parser.add_argument("--parallel", default=None, type=int)
252243
parser.add_argument("--min-time-limit", default=1, type=float)
253-
parser.add_argument(
254-
"--calibrate", action="store_true", help="Calibrate the evaluation results"
255-
)
256244
parser.add_argument(
257245
"--check-gt-only", action="store_true", help="Check the groundtruth"
258246
)

0 commit comments

Comments
 (0)