5959
6060
6161def _build_agent (
62- agent_type : Literal [AgentType .CLAUDE , AgentType .OPENAI ],
62+ agent_type : Literal [AgentType .CLAUDE , AgentType .OPERATOR ],
6363 * ,
6464 model : str | None = None ,
6565 allowed_tools : list [str ] | None = None ,
6666) -> ClaudeAgent | OperatorAgent :
6767 """Create and return the requested agent type."""
68- if agent_type == AgentType .OPENAI :
68+ if agent_type == AgentType .OPERATOR :
6969 return OperatorAgent (allowed_tools = allowed_tools , validate_api_key = False )
7070
7171 model = model or "claude-sonnet-4-5-20250929"
@@ -80,7 +80,7 @@ def _build_agent(
8080async def run_single_task (
8181 dataset_name : str ,
8282 * ,
83- agent_type : Literal [AgentType .CLAUDE , AgentType .OPENAI ] = AgentType .CLAUDE ,
83+ agent_type : Literal [AgentType .CLAUDE , AgentType .OPERATOR ] = AgentType .CLAUDE ,
8484 model : str | None = None ,
8585 allowed_tools : list [str ] | None = None ,
8686 max_steps : int = 10 ,
@@ -111,16 +111,18 @@ async def run_single_task(
111111async def run_full_dataset (
112112 dataset_name : str ,
113113 * ,
114- agent_type : Literal [AgentType .CLAUDE , AgentType .OPENAI ] = AgentType .CLAUDE ,
114+ agent_type : Literal [AgentType .CLAUDE , AgentType .OPERATOR ] = AgentType .CLAUDE ,
115115 model : str | None = None ,
116116 allowed_tools : list [str ] | None = None ,
117117 max_concurrent : int = 50 ,
118118 max_steps : int = 10 ,
119119) -> list [Any ]:
120120 """Run evaluation across entire dataset with asyncio concurrency."""
121- if agent_type == AgentType .OPENAI :
121+ if agent_type == AgentType .OPERATOR :
122122 agent_class = OperatorAgent
123- agent_config : dict [str , Any ] = {"validate_api_key" : False }
123+ agent_config = {"validate_api_key" : False }
124+ if model :
125+ agent_config ["model" ] = model
124126 if allowed_tools :
125127 # Only pass allowed tools if they are provided, otherwise all tools are enabled
126128 agent_config ["allowed_tools" ] = allowed_tools
@@ -171,7 +173,7 @@ def parse_args() -> argparse.Namespace: # type: ignore[valid-type]
171173 parser .add_argument ("--full" , action = "store_true" , help = "Run entire dataset" )
172174
173175 # Agent
174- parser .add_argument ("--agent" , choices = ["claude" , "openai " ], default = "claude" )
176+ parser .add_argument ("--agent" , choices = ["claude" , "operator " ], default = "claude" )
175177 parser .add_argument ("--model" , default = None , help = "Model override" )
176178 parser .add_argument (
177179 "--allowed-tools" , dest = "allowed_tools" , help = "Tool allowlist (comma-separated)"
0 commit comments