Skip to content

Commit 9a76b81

Browse files
committed
add --taskset flag to associate jobs with tasksets
1 parent bb8e740 commit 9a76b81

File tree

4 files changed

+14
-0
lines changed

4 files changed

+14
-0
lines changed

hud/cli/eval.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ class EvalConfig(BaseModel):
164164
"auto_respond",
165165
"quiet",
166166
"gateway",
167+
"taskset",
167168
}
168169
# Fields loaded from [agent] section
169170
_AGENT_FIELDS: ClassVar[set[str]] = {"allowed_tools", "disallowed_tools"}
@@ -184,6 +185,7 @@ class EvalConfig(BaseModel):
184185
remote: bool = False
185186
quiet: bool = False # Suppress opening browser for eval links
186187
gateway: bool = False # Use HUD Gateway for LLM API calls
188+
taskset: str | None = None # Taskset slug to associate job with
187189

188190
# Base agent config (these merge with task's agent_config)
189191
allowed_tools: list[str] | None = None
@@ -701,6 +703,7 @@ async def _run_evaluation(cfg: EvalConfig) -> tuple[list[Any], list[Any]]:
701703
max_concurrent=cfg.max_concurrent,
702704
group_size=cfg.group_size,
703705
quiet=cfg.quiet,
706+
taskset=cfg.taskset,
704707
)
705708

706709
# Show reward for single task
@@ -767,6 +770,9 @@ def eval_command(
767770
gateway: bool = typer.Option(
768771
False, "--gateway", "-g", help="Route LLM API calls through HUD Gateway"
769772
),
773+
taskset: str | None = typer.Option(
774+
None, "--taskset", "-t", help="Taskset slug to associate job with"
775+
),
770776
) -> None:
771777
"""🚀 Run evaluation on datasets or individual tasks with agents.
772778
@@ -801,6 +807,7 @@ def eval_command(
801807
byok=byok,
802808
quiet=quiet,
803809
gateway=gateway,
810+
taskset=taskset,
804811
)
805812

806813
# Find source if not provided

hud/datasets/runner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ async def run_dataset(
2929
max_concurrent: int = 30,
3030
group_size: int = 1,
3131
quiet: bool = True,
32+
taskset: str | None = None,
3233
) -> list[EvalContext]:
3334
"""Run an agent on a dataset of tasks.
3435
@@ -96,6 +97,7 @@ async def run_dataset(
9697
group=group_size,
9798
max_concurrent=max_concurrent,
9899
quiet=quiet,
100+
taskset=taskset,
99101
) as ctx:
100102
# Create agent using AgentType.cls.create()
101103
agent = agent_type.cls.create(**(agent_params or {}))

hud/eval/manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def _send_job_enter(
6262
variants: dict[str, Any] | None,
6363
group: int,
6464
api_key: str | None,
65+
taskset: str | None = None,
6566
) -> None:
6667
"""Send job enter payload (sync request before traces start)."""
6768
import httpx
@@ -77,6 +78,7 @@ def _send_job_enter(
7778
name=name,
7879
variants=variants,
7980
group=group,
81+
taskset=taskset,
8082
)
8183

8284
try:
@@ -105,6 +107,7 @@ async def run_eval(
105107
max_concurrent: int | None = None,
106108
trace: bool = True,
107109
quiet: bool = False,
110+
taskset: str | None = None,
108111
) -> AsyncGenerator[EvalContext, None]:
109112
"""Standalone eval context manager.
110113
@@ -279,6 +282,7 @@ async def run_eval(
279282
variants=variants,
280283
group=group,
281284
api_key=api_key,
285+
taskset=taskset,
282286
)
283287

284288
# Print job URL (not individual trace URLs)

hud/eval/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class JobEnterPayload(BaseModel):
5353
name: str | None = None
5454
variants: dict[str, Any] | None = None # Full variant config
5555
group: int | None = None
56+
taskset: str | None = None # taskset slug to associate job with
5657

5758

5859
__all__ = [

0 commit comments

Comments
 (0)