Skip to content

Commit fcaa7aa

Browse files
committed
feat: rename subset
1 parent be21a9a commit fcaa7aa

File tree

4 files changed

+27
-29
lines changed

4 files changed

+27
-29
lines changed

bigcodebench/data/bigcodebench.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,16 @@
1616
BIGCODEBENCH_HF = "bigcode/bigcodebench"
1717
BIGCODEBENCH_VERSION = "v0.1.0_hf"
1818

19-
def _ready_bigcodebench_path(hard=False, version="default") -> str:
19+
def _ready_bigcodebench_path(subset="", version="default") -> str:
2020
if BIGCODEBENCH_OVERRIDE_PATH:
2121
return BIGCODEBENCH_OVERRIDE_PATH
2222

2323
version = BIGCODEBENCH_VERSION if version == "default" else version
2424
url, path = get_dataset_metadata(
25-
BIGCODEBENCH_VERSION, hard
25+
BIGCODEBENCH_VERSION, subset
2626
)
2727

28-
extra = "-hard" if hard else ""
28+
extra = "-subset" if subset else ""
2929

3030
try:
3131
dataset = load_dataset(BIGCODEBENCH_HF+extra, split=BIGCODEBENCH_VERSION)
@@ -39,7 +39,7 @@ def _ready_bigcodebench_path(hard=False, version="default") -> str:
3939

4040

4141
def get_bigcodebench(
42-
err_incomplete=True, hard=False, version="default"
42+
err_incomplete=True, subset="full", version="default"
4343
) -> Dict[str, Dict]:
4444
"""Get BigCodeBench from BigCode's github repo and return as a list of parsed dicts.
4545
@@ -56,19 +56,19 @@ def get_bigcodebench(
5656
"""
5757
# Check if open eval file exists in CACHE_DIR
5858
data_path = _ready_bigcodebench_path(
59-
hard=hard, version=version
59+
subset=subset, version=version
6060
)
6161
data = {task["task_id"]: task for task in stream_jsonl(data_path)}
6262
if err_incomplete:
6363
completeness_check("BigCodeBench", data)
6464
return data
6565

66-
def get_bigcodebench_hash(hard=False, version="default") -> str:
66+
def get_bigcodebench_hash(subset="", version="default") -> str:
6767
"""Get the hash of BigCodeBench.
6868
Returns:
6969
str: The hash of BigCodeBench
7070
"""
71-
data_path = _ready_bigcodebench_path(hard, version="default")
71+
data_path = _ready_bigcodebench_path(subset, version="default")
7272
with open(data_path, "rb") as f:
7373
data = f.read()
7474
return hashlib.md5(data).hexdigest()

bigcodebench/data/utils.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,8 @@
1111
CACHE_DIR = user_cache_dir("bigcodebench")
1212

1313

14-
def get_dataset_metadata(version: str, hard: bool = False):
15-
extra = ""
16-
if hard:
17-
extra = "-Hard"
14+
def get_dataset_metadata(version: str, subset: str=""):
15+
extra = "-" + subset.capitalize() if subset else ""
1816
url = f"https://github.com/bigcode-project/bigcodebench-annotation/releases/download/{version}/BigCodeBench{extra}.jsonl.gz"
1917
cache_path = os.path.join(CACHE_DIR, f"BigCodeBench{extra}-{version}.jsonl")
2018
return url, cache_path

bigcodebench/evaluate.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,9 +247,9 @@ def stucking_checker():
247247
}
248248

249249
mode = "-calibrated" if "sanitized-calibrated" in flags.samples else ""
250-
extra = "Full" if not flags.hard else "Hard"
251-
flags.subset = flags.subset[0].upper() + flags.subset[1:]
252-
cprint(f"BigCodeBench-{flags.subset}{mode} ({extra})", "green")
250+
extra = flags.subset.capitalize()
251+
flags.split = flags.split.capitalize()
252+
cprint(f"BigCodeBench-{flags.split}{mode} ({extra})", "green")
253253

254254
if flags.no_gt:
255255
cprint(f"Groundtruth is not checked", "yellow")
@@ -285,7 +285,7 @@ def stucking_checker():
285285
def main():
286286
parser = argparse.ArgumentParser()
287287
parser.add_argument(
288-
"--subset", required=True, type=str, choices=["complete", "instruct"]
288+
"--split", required=True, type=str, choices=["complete", "instruct"]
289289
)
290290
parser.add_argument("--hard", action="store_true")
291291
parser.add_argument("--samples", required=True, type=str)

bigcodebench/generate.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,26 @@
1616
def codegen(
1717
model: DecoderBase,
1818
save_path: str,
19-
subset: str,
20-
hard=False,
19+
split: str,
20+
subset="full",
2121
greedy=False,
2222
strip_newlines=False,
2323
n_samples=1,
2424
id_range=None,
2525
resume=True,
2626
):
27-
extra = "Full" if not hard else "Hard"
27+
extra = "-" + subset.capitalize() if subset else ""
2828
with Progress(
29-
TextColumn(f"BigCodeBench--{subset} ({extra}) •" + "[progress.percentage]{task.percentage:>3.0f}%"),
29+
TextColumn(f"BigCodeBench--{split} ({extra}) •" + "[progress.percentage]{task.percentage:>3.0f}%"),
3030
BarColumn(),
3131
MofNCompleteColumn(),
3232
TextColumn("•"),
3333
TimeElapsedColumn(),
3434
) as p:
3535

36-
dataset = get_bigcodebench(hard=hard)
36+
dataset = get_bigcodebench(subset=subset)
3737

38-
if model.is_direct_completion() and subset == "instruct":
38+
if model.is_direct_completion() and split == "instruct":
3939
raise Exception("Base model does not support direct completion for instruct tasks")
4040

4141
# create save_path if it doesn't exist, e.g., a/b.jsonl
@@ -72,9 +72,9 @@ def codegen(
7272
sidx = n_samples - nsamples
7373
while sidx < n_samples:
7474
try:
75-
prompt = task[f"{subset}_prompt"]
75+
prompt = task[f"{split}_prompt"]
7676
except:
77-
raise Exception(f"Invalid subset {subset}")
77+
raise Exception(f"Invalid split {split}")
7878
if strip_newlines:
7979
prompt = prompt.strip("\n")
8080
outputs = model.codegen(
@@ -107,8 +107,8 @@ def codegen(
107107
def main():
108108
parser = argparse.ArgumentParser()
109109
parser.add_argument("--model", required=True, type=str)
110-
parser.add_argument("--subset", required=True, type=str)
111-
parser.add_argument("--hard", action="store_true")
110+
parser.add_argument("--split", required=True, type=str)
111+
parser.add_argument("--subset", default="", type=str)
112112
parser.add_argument("--save_path", default=None, type=str)
113113
parser.add_argument("--bs", default=1, type=int)
114114
parser.add_argument("--n_samples", default=1, type=int)
@@ -124,7 +124,7 @@ def main():
124124
args = parser.parse_args()
125125

126126

127-
assert args.subset in ["complete", "instruct"], f"Invalid subset {args.subset}"
127+
assert args.split in ["complete", "instruct"], f"Invalid split {args.split}"
128128
assert args.backend in ["vllm", "hf", "openai", "mistral", "anthropic", "google"]
129129

130130
if args.greedy and (args.temperature != 0 or args.bs != 1 or args.n_samples != 1)\
@@ -151,17 +151,17 @@ def main():
151151
trust_remote_code=args.trust_remote_code
152152
)
153153

154-
extra = "" if not args.hard else "-hard"
154+
extra = "-"+args.subset if args.subset
155155
if not args.save_path:
156-
save_path = args.model.replace("/", "--") + f"--bigcodebench{extra}-{args.subset}--{args.backend}-{args.temperature}-{args.n_samples}.jsonl"
156+
save_path = args.model.replace("/", "--") + f"--bigcodebench{extra}-{args.split}--{args.backend}-{args.temperature}-{args.n_samples}.jsonl"
157157
else:
158158
save_path = args.save_path
159159

160160
codegen(
161161
model=model_runner,
162162
save_path=save_path,
163+
split=args.split,
163164
subset=args.subset,
164-
hard=args.hard,
165165
greedy=args.greedy,
166166
strip_newlines=args.strip_newlines,
167167
n_samples=args.n_samples,

0 commit comments

Comments
 (0)