Skip to content

Commit 2061b1b

Browse files
committed
fix issues found by review bot
1 parent d827ef1 commit 2061b1b

File tree

1 file changed

+23
-14
lines changed

1 file changed

+23
-14
lines changed

src/agentlab/agents/openai_cua/agent.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
from dataclasses import dataclass
21
import logging
2+
import os
3+
from dataclasses import dataclass
34

5+
import openai
46
from bgym import HighLevelActionSetArgs
57
from browsergym.experiments import AbstractAgentArgs, Agent, AgentInfo
6-
from agentlab.llm.llm_utils import image_to_jpg_base64_url
78

8-
import openai
9+
from agentlab.llm.llm_utils import image_to_jpg_base64_url
910

10-
client = openai.OpenAI()
11+
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
1112

1213

1314
@dataclass
@@ -75,9 +76,10 @@ def __init__(
7576

7677
self.action_set = action_set.make_action_set()
7778

78-
assert not self.enable_safety_checks and (
79-
self.action_set.demo_mode is not None or self.action_set.demo_mode != "off"
80-
), "Safety checks are enabled but no demo mode is set. Please set demo_mode to 'all_blue' or 'off'."
79+
assert not (
80+
self.enable_safety_checks
81+
and (self.action_set.demo_mode is None or self.action_set.demo_mode == "off")
82+
), "Safety checks are enabled but no demo mode is set. Please set demo_mode to 'all_blue'."
8183

8284
self.computer_calls = []
8385
self.pending_checks = []
@@ -118,15 +120,21 @@ def parse_action_to_bgym(self, action) -> str:
118120

119121
case "keypress":
120122
keys = action.keys
123+
to_press = ""
121124
for k in keys:
122125
if k.lower() == "enter":
123-
return "keyboard_press('Enter')"
126+
to_press = "Enter"
124127
elif k.lower() == "space":
125-
return "keyboard_press(' ')"
128+
to_press = " "
126129
elif k.lower() == "ctrl":
127-
return "keyboard_press('Ctrl')"
130+
to_press = "Ctrl"
131+
elif k.lower() == "shift":
132+
to_press = "Shift"
133+
elif k.lower() == "alt":
134+
to_press = "Alt"
128135
else:
129-
return f"keyboard_press('{k}')"
136+
to_press += f"+{k}"
137+
return f"keyboard_press('{to_press}')"
130138

131139
case "type":
132140
text = action.text
@@ -150,7 +158,7 @@ def parse_action_to_bgym(self, action) -> str:
150158
return "noop()"
151159

152160
case _:
153-
logging.error(f"No action found for {action_type}. Please check the action type.")
161+
logging.debug(f"No action found for {action_type}. Please check the action type.")
154162
return None
155163

156164
return action
@@ -206,7 +214,7 @@ def get_action(self, obs):
206214
screenshot_base64 = image_to_jpg_base64_url(obs["screenshot"])
207215

208216
if not self.initialized:
209-
print("Initializing OpenAI Computer Use Agent with goal:", goal)
217+
logging.debug("Initializing OpenAI Computer Use Agent with goal:", goal)
210218
response = self.start_session(goal, screenshot_base64)
211219
for item in response.output:
212220
if item.type == "reasoning":
@@ -222,7 +230,6 @@ def get_action(self, obs):
222230
if not self.enable_safety_checks:
223231
# Bypass safety checks
224232
self.pending_checks = computer_call.pending_safety_checks
225-
print(f"Pending safety checks: {self.pending_checks}")
226233
action = self.parse_action_to_bgym(computer_call.action)
227234
self.last_call_id = computer_call.call_id
228235
return action, self.agent_info
@@ -245,7 +252,9 @@ def get_action(self, obs):
245252
self.inputs.append(self.answer_assistant)
246253
self.answer_assistant = None
247254

255+
self.agent_info.chat_messages = str(self.inputs)
248256
response = self.call_api(self.inputs, self.previous_response_id)
257+
self.inputs = [] # Clear inputs for the next call
249258
self.previous_response_id = response.id
250259

251260
self.computer_calls = [item for item in response.output if item.type == "computer_call"]

0 commit comments

Comments
 (0)