Skip to content

Commit 176e761

Browse files
authored
Merge pull request #12 from MiroMindAI/patch_pengxiang
feat(agent): shuffle the dataset for benchmark processing.
2 parents b68b726 + 930c44b commit 176e761

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

src/utils/task_utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"""Task execution utilities for benchmark evaluation."""
66

77
import asyncio
8+
import random
89
from pathlib import Path
910
from typing import List, Optional
1011

@@ -276,6 +277,11 @@ async def run_tasks(
276277
)
277278
print(f" pass@k={pass_at_k}, max_retry={max_retry}")
278279

280+
# Shuffle tasks to avoid order bias and improve balancing
281+
# This prevents long-tail tasks from accumulating at the end
282+
shuffled_tasks = tasks.copy()
283+
random.shuffle(shuffled_tasks)
284+
279285
semaphore = asyncio.Semaphore(max_concurrent)
280286

281287
async def run_with_semaphore(task: Task) -> TaskResult:
@@ -292,7 +298,7 @@ async def run_with_semaphore(task: Task) -> TaskResult:
292298
)
293299

294300
results = await asyncio.gather(
295-
*[run_with_semaphore(task) for task in tasks],
301+
*[run_with_semaphore(task) for task in shuffled_tasks],
296302
return_exceptions=True,
297303
)
298304

0 commit comments

Comments
 (0)