1616def codegen (
1717 model : DecoderBase ,
1818 save_path : str ,
19- subset : str ,
20- hard = False ,
19+ split : str ,
20+ subset = "full" ,
2121 greedy = False ,
2222 strip_newlines = False ,
2323 n_samples = 1 ,
2424 id_range = None ,
2525 resume = True ,
2626):
27- extra = "Full" if not hard else "Hard "
27+ extra = "-" + subset . capitalize () if subset else ""
2828 with Progress (
29- TextColumn (f"BigCodeBench--{ subset } ({ extra } ) •" + "[progress.percentage]{task.percentage:>3.0f}%" ),
29+ TextColumn (f"BigCodeBench--{ split } ({ extra } ) •" + "[progress.percentage]{task.percentage:>3.0f}%" ),
3030 BarColumn (),
3131 MofNCompleteColumn (),
3232 TextColumn ("•" ),
3333 TimeElapsedColumn (),
3434 ) as p :
3535
36- dataset = get_bigcodebench (hard = hard )
36+ dataset = get_bigcodebench (subset = subset )
3737
38- if model .is_direct_completion () and subset == "instruct" :
38+ if model .is_direct_completion () and split == "instruct" :
3939 raise Exception ("Base model does not support direct completion for instruct tasks" )
4040
4141 # create save_path if it doesn't exist, e.g., a/b.jsonl
@@ -72,9 +72,9 @@ def codegen(
7272 sidx = n_samples - nsamples
7373 while sidx < n_samples :
7474 try :
75- prompt = task [f"{ subset } _prompt" ]
75+ prompt = task [f"{ split } _prompt" ]
7676 except :
77- raise Exception (f"Invalid subset { subset } " )
77+ raise Exception (f"Invalid split { split } " )
7878 if strip_newlines :
7979 prompt = prompt .strip ("\n " )
8080 outputs = model .codegen (
@@ -107,8 +107,8 @@ def codegen(
107107def main ():
108108 parser = argparse .ArgumentParser ()
109109 parser .add_argument ("--model" , required = True , type = str )
110- parser .add_argument ("--subset " , required = True , type = str )
111- parser .add_argument ("--hard " , action = "store_true" )
110+ parser .add_argument ("--split " , required = True , type = str )
111+ parser .add_argument ("--subset " , default = "" , type = str )
112112 parser .add_argument ("--save_path" , default = None , type = str )
113113 parser .add_argument ("--bs" , default = 1 , type = int )
114114 parser .add_argument ("--n_samples" , default = 1 , type = int )
@@ -124,7 +124,7 @@ def main():
124124 args = parser .parse_args ()
125125
126126
127- assert args .subset in ["complete" , "instruct" ], f"Invalid subset { args .subset } "
127+ assert args .split in ["complete" , "instruct" ], f"Invalid split { args .split } "
128128 assert args .backend in ["vllm" , "hf" , "openai" , "mistral" , "anthropic" , "google" ]
129129
130130 if args .greedy and (args .temperature != 0 or args .bs != 1 or args .n_samples != 1 )\
@@ -151,17 +151,17 @@ def main():
151151 trust_remote_code = args .trust_remote_code
152152 )
153153
154- extra = "" if not args .hard else "-hard"
154+ extra = "-" + args . subset if args .subset
155155 if not args .save_path :
156- save_path = args .model .replace ("/" , "--" ) + f"--bigcodebench{ extra } -{ args .subset } --{ args .backend } -{ args .temperature } -{ args .n_samples } .jsonl"
156+ save_path = args .model .replace ("/" , "--" ) + f"--bigcodebench{ extra } -{ args .split } --{ args .backend } -{ args .temperature } -{ args .n_samples } .jsonl"
157157 else :
158158 save_path = args .save_path
159159
160160 codegen (
161161 model = model_runner ,
162162 save_path = save_path ,
163+ split = args .split ,
163164 subset = args .subset ,
164- hard = args .hard ,
165165 greedy = args .greedy ,
166166 strip_newlines = args .strip_newlines ,
167167 n_samples = args .n_samples ,
0 commit comments