|
8 | 8 | import jsonlines
|
9 | 9 | from datasets import load_dataset
|
10 | 10 | from openai import OpenAI
|
| 11 | +from concurrent.futures import ThreadPoolExecutor, as_completed |
11 | 12 |
|
12 | 13 | from lm_eval.api.instance import Instance
|
13 | 14 | from lm_eval.api.model import LM
|
@@ -49,6 +50,7 @@ class WildBenchConfig:
|
49 | 50 | model: str = "gpt-4o-mini-2024-07-18"
|
50 | 51 | mode: str = "score"
|
51 | 52 | batch_mode: bool = True
|
| 53 | + api_parallel: int = 32 |
52 | 54 |
|
53 | 55 | # Task weights
|
54 | 56 | task_weights: Dict[str, float] = None
|
@@ -336,15 +338,24 @@ def _process_evaluator_file(self, eval_file: str, client: OpenAI) -> None:
|
336 | 338 | with open(eval_file, "r") as file:
|
337 | 339 | lines = file.readlines()
|
338 | 340 |
|
339 |
| - results = [] |
340 |
| - for line in lines: |
| 341 | + def process_line(line): |
341 | 342 | payload = json.loads(line)
|
342 | 343 | payload["body"]["max_tokens"] = 4096
|
343 | 344 | response = client.chat.completions.create(**payload["body"])
|
344 |
| - |
345 | 345 | result = payload.copy()
|
346 | 346 | result["response"] = json.loads(response.json())
|
347 |
| - results.append(result) |
| 347 | + return result |
| 348 | + |
| 349 | + results = [] |
| 350 | + # Use ThreadPoolExecutor since API calls are I/O bound |
| 351 | + with ThreadPoolExecutor(max_workers=self.config.api_parallel) as executor: |
| 352 | + future_to_line = {executor.submit(process_line, line): line for line in lines} |
| 353 | + for future in as_completed(future_to_line): |
| 354 | + try: |
| 355 | + result = future.result() |
| 356 | + results.append(result) |
| 357 | + except Exception as e: |
| 358 | + self.logger.error(f"Error processing line: {str(e)}") |
348 | 359 |
|
349 | 360 | output_file = eval_file.replace("batch-submit.jsonl", "results.jsonl")
|
350 | 361 | with open(output_file, "w") as file:
|
|
0 commit comments