Skip to content

Commit 717835f

Browse files
committed
feat: refactor: model provider
1 parent f8897de commit 717835f

File tree

15 files changed

+656
-582
lines changed

15 files changed

+656
-582
lines changed

bigcodebench/evaluate.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def evaluate(
115115
split: str,
116116
subset: str,
117117
samples: Optional[str] = None,
118-
remote_execute: bool = True,
118+
local_execute: bool = False,
119119
remote_execute_api: str = "https://bigcode-bigcodebench-evaluator.hf.space/",
120120
pass_k: str = "1,5,10",
121121
save_pass_rate: bool = True,
@@ -135,16 +135,14 @@ def evaluate(
135135
**model_kwargs,
136136
)
137137
assert samples is not None, "No samples provided"
138-
139-
extra = subset + "_" if subset != "full" else ""
140-
138+
141139
if os.path.isdir(samples):
142-
result_path = os.path.join(samples, f"{extra}eval_results.json")
140+
result_path = os.path.join(samples, "eval_results.json")
143141
else:
144142
assert samples.endswith(".jsonl")
145-
result_path = samples.replace(".jsonl", f"_{extra}eval_results.json")
143+
result_path = samples.replace(".jsonl", "_eval_results.json")
146144

147-
if remote_execute:
145+
if not local_execute:
148146

149147
client = Client(remote_execute_api)
150148
results, pass_at_k = client.predict(
@@ -351,7 +349,7 @@ def stucking_checker():
351349
json.dump(results, f, indent=2)
352350

353351
if save_pass_rate:
354-
pass_at_k_path = result_path.replace("_eval_results.json", "_pass_at_k.json")
352+
pass_at_k_path = result_path.replace("eval_results.json", "pass_at_k.json")
355353

356354
if os.path.isfile(pass_at_k_path):
357355
saved_pass_at_k = json.load(open(pass_at_k_path, "r"))

bigcodebench/gen/util/anthropic_request.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,4 @@ def make_auto_request(client: anthropic.Client, *args, **kwargs) -> Message:
4444
print(e)
4545
signal.alarm(0)
4646
time.sleep(1)
47-
return ret
47+
return ret
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import time
2+
3+
import google.generativeai as genai
4+
from google.api_core.exceptions import GoogleAPICallError, ResourceExhausted
5+
6+
7+
def make_request(
8+
client: genai.GenerativeModel, temperature, messages, max_new_tokens=2048
9+
) -> genai.types.GenerateContentResponse:
10+
messages = [{"role": m["role"], "parts": [m["content"]]} for m in messages]
11+
response = client.generate_content(
12+
messages,
13+
generation_config=genai.types.GenerationConfig(
14+
candidate_count=1,
15+
max_output_tokens=max_new_tokens,
16+
temperature=temperature,
17+
),
18+
safety_settings=[
19+
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
20+
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
21+
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
22+
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
23+
],
24+
)
25+
26+
return response.text
27+
28+
29+
def make_auto_request(*args, **kwargs) -> genai.types.GenerateContentResponse:
30+
ret = None
31+
while ret is None:
32+
try:
33+
ret = make_request(*args, **kwargs)
34+
except ResourceExhausted as e:
35+
print("Rate limit exceeded. Waiting...", e.message)
36+
time.sleep(10)
37+
except GoogleAPICallError as e:
38+
print(e.message)
39+
time.sleep(1)
40+
except Exception as e:
41+
print("Unknown error. Waiting...")
42+
print(e)
43+
time.sleep(1)
44+
return ret
45+
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from mistralai.client import MistralClient
2+
from mistralai.models.chat_completion import ChatMessage
3+
4+
def make_auto_request(client: MistralClient, *args, **kwargs) -> ChatMessage:
5+
ret = None
6+
while ret is None:
7+
try:
8+
ret = client.chat(*args, **kwargs)
9+
except Exception as e:
10+
print(e)
11+
time.sleep(1)
12+
return ret

bigcodebench/generate.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,13 +154,21 @@ def run_codegen(
154154
assert id_range[0] < id_range[1], "id_range must be increasing"
155155
id_range = tuple(id_range)
156156

157+
# Make project dir
158+
os.makedirs(root, exist_ok=True)
159+
160+
instruction_prefix = "Please provide a self-contained Python script that solves the following problem in a markdown code block:"
161+
response_prefix = "Below is a Python script with a self-contained function that solves the problem and passes corresponding tests:"
162+
157163
# Make dir for codes generated by each model
158164
model_runner = make_model(
159165
model=model,
160166
backend=backend,
161167
subset=subset,
162168
split=split,
163169
temperature=temperature,
170+
instruction_prefix=instruction_prefix,
171+
response_prefix=response_prefix,
164172
base_url=base_url,
165173
tp=tp,
166174
trust_remote_code=trust_remote_code,

0 commit comments

Comments
 (0)