Skip to content

Commit 39e798c

Browse files
committed
Merge branch 'main' of github.com:WecoAI/weco-cli
2 parents 2487087 + 78c6ccb commit 39e798c

File tree

2 files changed

+15
-25
lines changed

2 files changed

+15
-25
lines changed

examples/prompt/eval.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,30 +11,29 @@
1111
from concurrent.futures import ThreadPoolExecutor, as_completed
1212

1313
from datasets import load_dataset
14-
import optimize # the file Weco mutates
14+
import optimize # the file Weco mutates
1515

1616
# ---------------------------------------------------------------------
1717
# Configuration
18-
TOTAL_SAMPLES = 20 # how many problems to load
19-
NUM_WORKERS = 20 # concurrent LLM calls
20-
LOG_EVERY = 5 # print progress after this many
18+
TOTAL_SAMPLES = 20 # how many problems to load
19+
NUM_WORKERS = 20 # concurrent LLM calls
20+
LOG_EVERY = 5 # print progress after this many
2121
# ---------------------------------------------------------------------
2222

2323
print(f"[setup] loading {TOTAL_SAMPLES} problems from AIME 2024 …")
24-
DATA = load_dataset(
25-
"Maxwell-Jia/AIME_2024",
26-
split=f"train[:{TOTAL_SAMPLES}]",
27-
cache_dir=".cache"
28-
)
24+
DATA = load_dataset("Maxwell-Jia/AIME_2024", split=f"train[:{TOTAL_SAMPLES}]", cache_dir=".cache")
25+
2926

3027
def extract_number(text: str) -> str:
3128
m = re.search(r"\b(\d{1,3})\b", text)
3229
return m.group(1) if m else ""
3330

31+
3432
def score_one(row) -> bool:
3533
guess = extract_number(optimize.solve(row["Problem"]))
3634
return guess == str(row["Answer"])
3735

36+
3837
def accuracy() -> float:
3938
correct = 0
4039
start = time.time()
@@ -46,13 +45,11 @@ def accuracy() -> float:
4645

4746
if idx % LOG_EVERY == 0 or idx == TOTAL_SAMPLES:
4847
elapsed = time.time() - start
49-
print(
50-
f"[progress] {idx}/{TOTAL_SAMPLES} completed, "
51-
f"elapsed {elapsed:.1f} s"
52-
)
48+
print(f"[progress] {idx}/{TOTAL_SAMPLES} completed, elapsed {elapsed:.1f} s")
5349

5450
return correct / TOTAL_SAMPLES
5551

52+
5653
if __name__ == "__main__":
5754
acc = accuracy()
58-
print(f"accuracy: {acc:.4f}") # Weco parses this line
55+
print(f"accuracy: {acc:.4f}") # Weco parses this line

examples/prompt/optimize.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
from openai import OpenAI
88

9-
client = OpenAI() # API key must be in OPENAI_API_KEY
10-
MODEL = "gpt-4o-mini" # change if you have another model
9+
client = OpenAI() # API key must be in OPENAI_API_KEY
10+
MODEL = "gpt-4o-mini" # change if you have another model
1111

1212
PROMPT_TEMPLATE = """You are an expert competition mathematician.
1313
Solve the following AIME problem.
@@ -28,13 +28,6 @@
2828

2929
def solve(problem: str) -> str:
3030
"""Return the model's raw text answer for one problem."""
31-
prompt = PROMPT_TEMPLATE.format(
32-
problem=problem,
33-
extra_instructions=EXTRA_INSTRUCTIONS,
34-
)
35-
response = client.chat.completions.create(
36-
model=MODEL,
37-
messages=[{"role": "user", "content": prompt}],
38-
temperature=0,
39-
)
31+
prompt = PROMPT_TEMPLATE.format(problem=problem, extra_instructions=EXTRA_INSTRUCTIONS)
32+
response = client.chat.completions.create(model=MODEL, messages=[{"role": "user", "content": prompt}], temperature=0)
4033
return response.choices[0].message.content.strip()

0 commit comments

Comments
 (0)