Skip to content

Commit 3fcb7e1

Browse files
committed
Fix dict issue for call_gpt_4_v_labeled
1 parent 61584a6 commit 3fcb7e1

File tree

3 files changed

+19
-16
lines changed

3 files changed

+19
-16
lines changed

operate/actions/actions.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import io
77
import asyncio
88
import aiohttp
9+
910
from PIL import Image
1011
from ultralytics import YOLO
1112
import google.generativeai as genai
@@ -329,22 +330,19 @@ async def call_gpt_4_v_labeled(messages, objective):
329330
decision_messages = messages.copy()
330331
decision_messages.append(decision_message)
331332

332-
click_future = await fetch_openai_response_async(click_messages)
333-
decision_future = await fetch_openai_response_async(decision_messages)
334-
335-
loop = asyncio.new_event_loop()
336-
asyncio.set_event_loop(loop)
333+
click_future = fetch_openai_response_async(click_messages)
334+
decision_future = fetch_openai_response_async(decision_messages)
337335

338-
click_response, decision_response = loop.run_until_complete(
339-
asyncio.gather(click_future, decision_future)
336+
click_response, decision_response = await asyncio.gather(
337+
click_future, decision_future
340338
)
341339

342-
loop.close()
343-
344340
# Extracting the message content from the ChatCompletionMessage object
345-
click_content = click_response.choices[0].message.content
341+
click_content = click_response.get("choices")[0].get("message").get("content")
346342

347-
decision_content = decision_response.choices[0].message.content
343+
decision_content = (
344+
decision_response.get("choices")[0].get("message").get("content")
345+
)
348346

349347
if not decision_content.startswith("CLICK"):
350348
return decision_content

operate/utils/action_util.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import pyautogui
2-
import platform
2+
import platform
33
import time
44
import math
5+
56
from operate.utils.utils import convert_percent_to_decimal
67

8+
79
def keyboard_type(text):
810
"""
911
Types the given text using the keyboard.
@@ -20,6 +22,7 @@ def keyboard_type(text):
2022
pyautogui.press("enter")
2123
return "Type: " + text
2224

25+
2326
def search(text):
2427
"""
2528
Searches for a program or file by typing the given text in the search bar and pressing Enter.
@@ -49,6 +52,7 @@ def search(text):
4952
pyautogui.press("enter")
5053
return "Open program: " + text
5154

55+
5256
def click_at_percentage(
5357
x_percentage, y_percentage, duration=0.2, circle_radius=50, circle_duration=0.5
5458
):
@@ -125,4 +129,3 @@ def get_last_assistant_message(messages):
125129
else:
126130
return messages[index]
127131
return None # Return None if no assistant message is found
128-

operate/utils/utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import json
22
import re
3+
4+
35
def convert_percent_to_decimal(percent_str):
46
"""
57
Converts a percentage string to a decimal value.
@@ -26,7 +28,6 @@ def convert_percent_to_decimal(percent_str):
2628
except ValueError as e:
2729
print(f"Error converting percent to decimal: {e}")
2830
return None
29-
3031

3132

3233
def extract_json_from_string(s):
@@ -52,7 +53,8 @@ def extract_json_from_string(s):
5253
except Exception as e:
5354
print(f"Error parsing JSON: {e}")
5455
return None
55-
56+
57+
5658
def parse_response(response):
5759
"""
5860
Parses the given response and returns a dictionary with the type and data.
@@ -97,4 +99,4 @@ def parse_response(response):
9799
search_data = re.search(r"SEARCH (.+)", response).group(1)
98100
return {"type": "SEARCH", "data": search_data}
99101

100-
return {"type": "UNKNOWN", "data": response}
102+
return {"type": "UNKNOWN", "data": response}

0 commit comments

Comments
 (0)