Skip to content

Commit a13bf9e

Browse files
committed
fix: update calibrate option
1 parent 697dabd commit a13bf9e

File tree

2 files changed

+16
-13
lines changed

2 files changed

+16
-13
lines changed

bigcodebench/evaluate.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -96,16 +96,19 @@ def evaluate(flags):
9696
if flags.check_gt_only:
9797
# bypass the samples
9898
flags.samples = "__dummy__.jsonl"
99-
99+
100+
if flags.calibrate:
101+
assert "calibrate" in flags.samples, "Calibration is only supported for calibrated samples"
102+
100103
if os.path.isdir(flags.samples):
101-
if flags.reprompt:
102-
result_path = os.path.join(flags.samples, "reprompt_eval_results.json")
104+
if flags.calibrate:
105+
result_path = os.path.join(flags.samples, "calibrate_eval_results.json")
103106
else:
104107
result_path = os.path.join(flags.samples, "eval_results.json")
105108
else:
106109
assert flags.samples.endswith(".jsonl")
107-
if flags.reprompt:
108-
result_path = flags.samples.replace(".jsonl", "_reprompt_eval_results.json")
110+
if flags.calibrate:
111+
result_path = flags.samples.replace(".jsonl", "_calibrate_eval_results.json")
109112
else:
110113
result_path = flags.samples.replace(".jsonl", "_eval_results.json")
111114

@@ -150,7 +153,7 @@ def evaluate(flags):
150153
if "solution" in sample
151154
else problems[task_id]["prompt"] + sample["completion"]
152155
)
153-
if flags.reprompt:
156+
if flags.calibrate:
154157
solution = problems[task_id]["prompt_wo_doc"] + "\n pass\n" + solution
155158
remainings.add(sample["_identifier"])
156159
args = (
@@ -249,7 +252,7 @@ def main():
249252
parser.add_argument("--parallel", default=None, type=int)
250253
parser.add_argument("--min-time-limit", default=1, type=float)
251254
parser.add_argument(
252-
"--reprompt", action="store_true", help="Prepend the prompt again"
255+
"--calibrate", action="store_true", help="Calibrate the evaluation results"
253256
)
254257
parser.add_argument(
255258
"--check-gt-only", action="store_true", help="Check the groundtruth"

bigcodebench/sanitize.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def sanitize(code: str, entrypoint: Optional[str] = None) -> str:
179179

180180

181181
def script(
182-
samples: str, inplace: bool = False, debug_task: str = None, reprompt: bool = False
182+
samples: str, inplace: bool = False, debug_task: str = None, calibrate: bool = False
183183
):
184184
# task_id -> entry_point
185185
entry_point = {}
@@ -194,13 +194,13 @@ def script(
194194
target_path = pathlib.Path(samples)
195195
if not inplace:
196196
if is_folder:
197-
if reprompt:
198-
new_name = target_path.name + "-sanitized-reprompt"
197+
if calibrate:
198+
new_name = target_path.name + "-sanitized-calibrate"
199199
else:
200200
new_name = target_path.name + "-sanitized"
201201
else:
202-
if reprompt:
203-
new_name = target_path.name.replace(".jsonl", "-sanitized-reprompt.jsonl")
202+
if calibrate:
203+
new_name = target_path.name.replace(".jsonl", "-sanitized-calibrate.jsonl")
204204
else:
205205
new_name = target_path.name.replace(".jsonl", "-sanitized.jsonl")
206206
target_path = target_path.parent / new_name
@@ -227,7 +227,7 @@ def script(
227227
ntotal += 1
228228
if "solution" in solution:
229229
old_code = solution["solution"]
230-
if reprompt:
230+
if calibrate:
231231
old_code = solution["solution"].replace("```python\n ", "```python\n"+dataset[task_id]["prompt"]+" ")
232232
else:
233233
assert "completion" in solution

0 commit comments

Comments
 (0)