Merge branch 'main' of github.com:WecoAI/weco-cli

ZhengyaoJiang · ZhengyaoJiang · commit 39e798cd24c9 · 2025-04-17T18:51:05.000+01:00
diff --git a/examples/prompt/eval.py b/examples/prompt/eval.py
@@ -11,30 +11,29 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
 from datasets import load_dataset
-import optimize                       # the file Weco mutates
+import optimize  # the file Weco mutates
 
 # ---------------------------------------------------------------------
 # Configuration
-TOTAL_SAMPLES = 20                    # how many problems to load
-NUM_WORKERS   = 20                    # concurrent LLM calls
-LOG_EVERY     = 5                     # print progress after this many
+TOTAL_SAMPLES = 20  # how many problems to load
+NUM_WORKERS = 20  # concurrent LLM calls
+LOG_EVERY = 5  # print progress after this many
 # ---------------------------------------------------------------------
 
 print(f"[setup] loading {TOTAL_SAMPLES} problems from AIME 2024 …")
-DATA = load_dataset(
-    "Maxwell-Jia/AIME_2024",
-    split=f"train[:{TOTAL_SAMPLES}]",
-    cache_dir=".cache"
-)
+DATA = load_dataset("Maxwell-Jia/AIME_2024", split=f"train[:{TOTAL_SAMPLES}]", cache_dir=".cache")
+
 
 def extract_number(text: str) -> str:
     m = re.search(r"\b(\d{1,3})\b", text)
     return m.group(1) if m else ""
 
+
 def score_one(row) -> bool:
     guess = extract_number(optimize.solve(row["Problem"]))
     return guess == str(row["Answer"])
 
+
 def accuracy() -> float:
     correct = 0
     start = time.time()
@@ -46,13 +45,11 @@ def accuracy() -> float:
 
             if idx % LOG_EVERY == 0 or idx == TOTAL_SAMPLES:
                 elapsed = time.time() - start
-                print(
-                    f"[progress] {idx}/{TOTAL_SAMPLES} completed, "
-                    f"elapsed {elapsed:.1f} s"
-                )
+                print(f"[progress] {idx}/{TOTAL_SAMPLES} completed, elapsed {elapsed:.1f} s")
 
     return correct / TOTAL_SAMPLES
 
+
 if __name__ == "__main__":
     acc = accuracy()
-    print(f"accuracy: {acc:.4f}")     # Weco parses this line
+    print(f"accuracy: {acc:.4f}")  # Weco parses this line
diff --git a/examples/prompt/optimize.py b/examples/prompt/optimize.py
@@ -6,8 +6,8 @@
 
 from openai import OpenAI
 
-client = OpenAI()          # API key must be in OPENAI_API_KEY
-MODEL = "gpt-4o-mini"      # change if you have another model
+client = OpenAI()  # API key must be in OPENAI_API_KEY
+MODEL = "gpt-4o-mini"  # change if you have another model
 
 PROMPT_TEMPLATE = """You are an expert competition mathematician.
 Solve the following AIME problem.
@@ -28,13 +28,6 @@
 
 def solve(problem: str) -> str:
     """Return the model's raw text answer for one problem."""
-    prompt = PROMPT_TEMPLATE.format(
-        problem=problem,
-        extra_instructions=EXTRA_INSTRUCTIONS,
-    )
-    response = client.chat.completions.create(
-        model=MODEL,
-        messages=[{"role": "user", "content": prompt}],
-        temperature=0,
-    )
+    prompt = PROMPT_TEMPLATE.format(problem=problem, extra_instructions=EXTRA_INSTRUCTIONS)
+    response = client.chat.completions.create(model=MODEL, messages=[{"role": "user", "content": prompt}], temperature=0)
     return response.choices[0].message.content.strip()