Skip to content

Commit a43e54d

Browse files
Merge branch 'main' into generic_agent_hinter
2 parents fcf42b3 + 887cce4 commit a43e54d

20 files changed

+2221
-47
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,4 @@ hint = [
109109
[project.scripts]
110110
agentlab-assistant = "agentlab.ui_assistant:main"
111111
agentlab-xray = "agentlab.analyze.agent_xray:main"
112+
agentlab-mentor = "agentlab.agents.hitl_agent.launch_hint_ui:main"

src/agentlab/agents/agent_utils.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
1+
import copy
2+
13
from PIL import Image, ImageDraw
24
from playwright.sync_api import Page
35

6+
from agentlab.analyze import overlay_utils
7+
from agentlab.llm.llm_utils import img_to_base_64
8+
49

510
def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
611
"""
@@ -128,3 +133,24 @@ def zoom_webpage(page: Page, zoom_factor: float = 1.5):
128133

129134
page.evaluate(f"document.documentElement.style.zoom='{zoom_factor*100}%'")
130135
return page
136+
137+
138+
def overlay_action(obs, action):
139+
"""Overlays actions on screenshot in-place"""
140+
act_img = copy.deepcopy(obs["screenshot"])
141+
act_img = Image.fromarray(act_img)
142+
143+
new_obs_properties = copy.deepcopy(obs["extra_element_properties"])
144+
import os
145+
146+
if os.getenv("AGENTLAB_USE_RETINA"):
147+
# HACK: divide everything by 2 in the obs
148+
# TODO: make this more robust by changing login in annotate_action directly (or maybe in the obs section?)
149+
for key, value in new_obs_properties.items():
150+
try:
151+
new_obs_properties[key]["bbox"] = [elem / 2 for elem in value["bbox"]]
152+
except:
153+
pass
154+
155+
overlay_utils.annotate_action(act_img, action, properties=new_obs_properties)
156+
return img_to_base_64(act_img)

src/agentlab/agents/generic_agent/__init__.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,22 @@
2222
AGENT_4o_MINI,
2323
AGENT_4o_MINI_VISION,
2424
AGENT_4o_VISION,
25+
AGENT_AZURE_4o_MINI,
26+
AGENT_AZURE_4o,
27+
AGENT_AZURE_4o_VISION,
28+
AGENT_AZURE_4o_MINI_VISION,
29+
AGENT_AZURE_41,
30+
AGENT_AZURE_41_MINI,
31+
AGENT_AZURE_41_NANO,
32+
AGENT_AZURE_41_VISION,
33+
AGENT_AZURE_41_MINI_VISION,
34+
AGENT_AZURE_41_NANO_VISION,
35+
AGENT_AZURE_5,
36+
AGENT_AZURE_5_MINI,
37+
AGENT_AZURE_5_NANO,
38+
AGENT_AZURE_5_VISION,
39+
AGENT_AZURE_5_MINI_VISION,
40+
AGENT_AZURE_5_NANO_VISION,
2541
AGENT_o1_MINI,
2642
AGENT_o3_MINI,
2743
FLAGS_GPT_4o,
@@ -46,6 +62,22 @@
4662
"AGENT_37_SONNET",
4763
"AGENT_4o_VISION",
4864
"AGENT_4o_MINI_VISION",
65+
"AGENT_AZURE_4o_MINI",
66+
"AGENT_AZURE_4o",
67+
"AGENT_AZURE_4o_VISION",
68+
"AGENT_AZURE_4o_MINI_VISION",
69+
"AGENT_AZURE_41",
70+
"AGENT_AZURE_41_MINI",
71+
"AGENT_AZURE_41_NANO",
72+
"AGENT_AZURE_41_VISION",
73+
"AGENT_AZURE_41_MINI_VISION",
74+
"AGENT_AZURE_41_NANO_VISION",
75+
"AGENT_AZURE_5",
76+
"AGENT_AZURE_5_MINI",
77+
"AGENT_AZURE_5_NANO",
78+
"AGENT_AZURE_5_VISION",
79+
"AGENT_AZURE_5_MINI_VISION",
80+
"AGENT_AZURE_5_NANO_VISION",
4981
"AGENT_CLAUDE_SONNET_35_VISION",
5082
"AGENT_GPT5_MINI",
5183
]

src/agentlab/agents/generic_agent/agent_configs.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,43 @@
262262
chat_model_args=CHAT_MODEL_ARGS_DICT["openai/gpt-4o-mini-2024-07-18"],
263263
flags=FLAGS_GPT_4o,
264264
)
265+
266+
AGENT_AZURE_4o_MINI = GenericAgentArgs(
267+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-4o-mini-2024-07-18"],
268+
flags=FLAGS_GPT_4o,
269+
)
270+
AGENT_AZURE_4o = GenericAgentArgs(
271+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-4o-2024-08-06"],
272+
flags=FLAGS_GPT_4o,
273+
)
274+
AGENT_AZURE_41 = GenericAgentArgs(
275+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-4.1-2025-04-14"],
276+
flags=FLAGS_GPT_4o,
277+
)
278+
AGENT_AZURE_41_MINI = GenericAgentArgs(
279+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-4.1-mini-2025-04-14"],
280+
flags=FLAGS_GPT_4o,
281+
)
282+
AGENT_AZURE_41_NANO = GenericAgentArgs(
283+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-4.1-nano-2025-04-14"],
284+
flags=FLAGS_GPT_4o,
285+
)
286+
287+
AGENT_AZURE_5 = GenericAgentArgs(
288+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-5-2025-08-07"],
289+
flags=FLAGS_GPT_4o,
290+
)
291+
292+
AGENT_AZURE_5_MINI = GenericAgentArgs(
293+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-5-mini-2025-08-07"],
294+
flags=FLAGS_GPT_4o,
295+
)
296+
297+
AGENT_AZURE_5_NANO = GenericAgentArgs(
298+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-5-nano-2025-08-07"],
299+
flags=FLAGS_GPT_4o,
300+
)
301+
265302
AGENT_CLAUDE_SONNET_35 = GenericAgentArgs(
266303
chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/anthropic/claude-3.5-sonnet:beta"],
267304
flags=FLAGS_GPT_4o,
@@ -298,6 +335,45 @@
298335
flags=FLAGS_GPT_4o_VISION,
299336
)
300337

338+
AGENT_AZURE_4o_VISION = GenericAgentArgs(
339+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-4o-2024-08-06"],
340+
flags=FLAGS_GPT_4o_VISION,
341+
)
342+
343+
AGENT_AZURE_4o_MINI_VISION = GenericAgentArgs(
344+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-4o-mini-2024-07-18"],
345+
flags=FLAGS_GPT_4o_VISION,
346+
)
347+
348+
AGENT_AZURE_41_VISION = GenericAgentArgs(
349+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-4.1-2025-04-14"],
350+
flags=FLAGS_GPT_4o_VISION,
351+
)
352+
353+
AGENT_AZURE_41_MINI_VISION = GenericAgentArgs(
354+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-4.1-mini-2025-04-14"],
355+
flags=FLAGS_GPT_4o_VISION,
356+
)
357+
AGENT_AZURE_41_NANO_VISION = GenericAgentArgs(
358+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-4.1-nano-2025-04-14"],
359+
flags=FLAGS_GPT_4o_VISION,
360+
)
361+
362+
AGENT_AZURE_5_VISION = GenericAgentArgs(
363+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-5-2025-08-07"],
364+
flags=FLAGS_GPT_4o_VISION,
365+
)
366+
367+
AGENT_AZURE_5_MINI_VISION = GenericAgentArgs(
368+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-5-mini-2025-08-07"],
369+
flags=FLAGS_GPT_4o_VISION,
370+
)
371+
372+
AGENT_AZURE_5_NANO_VISION = GenericAgentArgs(
373+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-5-nano-2025-08-07"],
374+
flags=FLAGS_GPT_4o_VISION,
375+
)
376+
301377
AGENT_CLAUDE_SONNET_35_VISION = GenericAgentArgs(
302378
chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/anthropic/claude-3.5-sonnet:beta"],
303379
flags=FLAGS_GPT_4o_VISION,
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from typing_extensions import Protocol
2+
3+
from agentlab.agents.agent_args import AgentArgs
4+
5+
6+
class MultiCandidateAgent(Protocol):
7+
"""
8+
Protocol for agents that generate multiple candidates for get_action.
9+
10+
This protocol defines the contract for agents that can generate
11+
multiple candidate actions and allow selection of one of them for execution.
12+
"""
13+
14+
def get_candidate_generations(
15+
self, obs: dict, hint: list[str] | None = None, n_candidates: int = 3
16+
) -> "list[dict]":
17+
"""
18+
Generate multiple candidate actions for the given observation.
19+
20+
You can pass extra info in agent_info to update internal state of the
21+
agent based on the selected candidate. Your internal state management
22+
should be robust to multiple calls to the get_candidate_generations method
23+
in a single step.
24+
25+
Args:
26+
obs: The current observation dictionary containing environment state
27+
hint: Optional list of hint strings to guide candidate generation
28+
n_candidates: Number of candidate actions to generate
29+
"""
30+
...
31+
32+
def update_agent_state_from_selected_candidate(self, output: dict):
33+
"""
34+
Update the agent's internal state based on the selected candidate.
35+
This can include any memory or planning updates.
36+
37+
Args:
38+
output: The selected candidate action dictionary
39+
"""
40+
pass
41+
42+
43+
class MultiCandidateAgentArgs(AgentArgs):
44+
def make_agent(self) -> MultiCandidateAgent: ...
45+
46+
def __post_init__(self):
47+
"""Prefix subagent name with 'MC-'."""
48+
super().__post_init__()
49+
if hasattr(self, "agent_name") and self.agent_name:
50+
self.agent_name = "MC-" + self.agent_name

0 commit comments

Comments
 (0)