@@ -164,6 +164,7 @@ class EvalConfig(BaseModel):
164164 "auto_respond" ,
165165 "quiet" ,
166166 "gateway" ,
167+ "taskset" ,
167168 }
168169 # Fields loaded from [agent] section
169170 _AGENT_FIELDS : ClassVar [set [str ]] = {"allowed_tools" , "disallowed_tools" }
@@ -184,6 +185,7 @@ class EvalConfig(BaseModel):
184185 remote : bool = False
185186 quiet : bool = False # Suppress opening browser for eval links
186187 gateway : bool = False # Use HUD Gateway for LLM API calls
188+ taskset : str | None = None # Taskset slug to associate job with
187189
188190 # Base agent config (these merge with task's agent_config)
189191 allowed_tools : list [str ] | None = None
@@ -701,6 +703,7 @@ async def _run_evaluation(cfg: EvalConfig) -> tuple[list[Any], list[Any]]:
701703 max_concurrent = cfg .max_concurrent ,
702704 group_size = cfg .group_size ,
703705 quiet = cfg .quiet ,
706+ taskset = cfg .taskset ,
704707 )
705708
706709 # Show reward for single task
@@ -767,6 +770,9 @@ def eval_command(
767770 gateway : bool = typer .Option (
768771 False , "--gateway" , "-g" , help = "Route LLM API calls through HUD Gateway"
769772 ),
773+ taskset : str | None = typer .Option (
774+ None , "--taskset" , "-t" , help = "Taskset slug to associate job with"
775+ ),
770776) -> None :
771777 """🚀 Run evaluation on datasets or individual tasks with agents.
772778
@@ -801,6 +807,7 @@ def eval_command(
801807 byok = byok ,
802808 quiet = quiet ,
803809 gateway = gateway ,
810+ taskset = taskset ,
804811 )
805812
806813 # Find source if not provided
0 commit comments