Skip to content

Commit 75596be

Browse files
authored
Fixed tool using bugs
Fixed tool using bugs
2 parents ed22747 + bcaaf52 commit 75596be

File tree

4 files changed

+257
-48
lines changed

4 files changed

+257
-48
lines changed

interpreter/core/async_core.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
pass
3535

3636

37+
complete_message = {"role": "server", "type": "status", "content": "complete"}
38+
39+
3740
class AsyncInterpreter(OpenInterpreter):
3841
def __init__(self, *args, **kwargs):
3942
super().__init__(*args, **kwargs)
@@ -129,9 +132,7 @@ def respond(self, run_code=None):
129132

130133
self.output_queue.sync_q.put(chunk)
131134

132-
self.output_queue.sync_q.put(
133-
{"role": "server", "type": "status", "content": "complete"}
134-
)
135+
self.output_queue.sync_q.put(complete_message)
135136
except Exception as e:
136137
error = traceback.format_exc() + "\n" + str(e)
137138
error_message = {
@@ -140,6 +141,7 @@ def respond(self, run_code=None):
140141
"content": traceback.format_exc() + "\n" + str(e),
141142
}
142143
self.output_queue.sync_q.put(error_message)
144+
self.output_queue.sync_q.put(complete_message)
143145
print("\n\n--- SENT ERROR: ---\n\n")
144146
print(error)
145147
print("\n\n--- (ERROR ABOVE WAS SENT) ---\n\n")
@@ -428,6 +430,7 @@ async def receive_input():
428430
"content": traceback.format_exc() + "\n" + str(e),
429431
}
430432
await websocket.send_text(json.dumps(error_message))
433+
await websocket.send_text(json.dumps(complete_message))
431434
print("\n\n--- SENT ERROR: ---\n\n")
432435
print(error)
433436
print("\n\n--- (ERROR ABOVE WAS SENT) ---\n\n")
@@ -496,6 +499,7 @@ async def send_output():
496499
"content": traceback.format_exc() + "\n" + str(e),
497500
}
498501
await websocket.send_text(json.dumps(error_message))
502+
await websocket.send_text(json.dumps(complete_message))
499503
print("\n\n--- SENT ERROR: ---\n\n")
500504
print(error)
501505
print("\n\n--- (ERROR ABOVE WAS SENT) ---\n\n")
@@ -510,6 +514,7 @@ async def send_output():
510514
"content": traceback.format_exc() + "\n" + str(e),
511515
}
512516
await websocket.send_text(json.dumps(error_message))
517+
await websocket.send_text(json.dumps(complete_message))
513518
print("\n\n--- SENT ERROR: ---\n\n")
514519
print(error)
515520
print("\n\n--- (ERROR ABOVE WAS SENT) ---\n\n")

interpreter/core/core.py

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,22 @@ def is_ephemeral(chunk):
322322
if chunk["content"] == "":
323323
continue
324324

325+
# If active_line is None, we finished running code.
326+
if (
327+
chunk.get("format") == "active_line"
328+
and chunk.get("content", "") == None
329+
):
330+
# If output wasn't yet produced, add an empty output
331+
if self.messages[-1]["role"] != "computer":
332+
self.messages.append(
333+
{
334+
"role": "computer",
335+
"type": "console",
336+
"format": "output",
337+
"content": "",
338+
}
339+
)
340+
325341
# Handle the special "confirmation" chunk, which neither triggers a flag or creates a message
326342
if chunk["type"] == "confirmation":
327343
# Emit a end flag for the last message type, and reset last_flag_base
@@ -334,14 +350,14 @@ def is_ephemeral(chunk):
334350

335351
# We want to append this now, so even if content is never filled, we know that the execution didn't produce output.
336352
# ... rethink this though.
337-
self.messages.append(
338-
{
339-
"role": "computer",
340-
"type": "console",
341-
"format": "output",
342-
"content": "",
343-
}
344-
)
353+
# self.messages.append(
354+
# {
355+
# "role": "computer",
356+
# "type": "console",
357+
# "format": "output",
358+
# "content": "",
359+
# }
360+
# )
345361
continue
346362

347363
# Check if the chunk's role, type, and format (if present) match the last_flag_base
@@ -362,7 +378,19 @@ def is_ephemeral(chunk):
362378
# If they match, append the chunk's content to the current message's content
363379
# (Except active_line, which shouldn't be stored)
364380
if not is_ephemeral(chunk):
365-
self.messages[-1]["content"] += chunk["content"]
381+
if any(
382+
[
383+
(property in self.messages[-1])
384+
and (
385+
self.messages[-1].get(property)
386+
!= chunk.get(property)
387+
)
388+
for property in ["role", "type", "format"]
389+
]
390+
):
391+
self.messages.append(chunk)
392+
else:
393+
self.messages[-1]["content"] += chunk["content"]
366394
else:
367395
# If they don't match, yield a end message for the last message type and a start message for the new one
368396
if last_flag_base:

interpreter/core/llm/run_tool_calling_llm.py

Lines changed: 133 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,74 @@
2727
}
2828

2929

30+
def process_messages(messages):
31+
processed_messages = []
32+
last_tool_id = 0
33+
34+
i = 0
35+
while i < len(messages):
36+
message = messages[i]
37+
38+
if message.get("function_call"):
39+
last_tool_id += 1
40+
tool_id = f"toolu_{last_tool_id}"
41+
42+
# Convert function_call to tool_calls
43+
function = message.pop("function_call")
44+
message["tool_calls"] = [
45+
{"id": tool_id, "type": "function", "function": function}
46+
]
47+
processed_messages.append(message)
48+
49+
# Process the next message if it's a function response
50+
if i + 1 < len(messages) and messages[i + 1].get("role") == "function":
51+
next_message = messages[i + 1].copy()
52+
next_message["role"] = "tool"
53+
next_message["tool_call_id"] = tool_id
54+
processed_messages.append(next_message)
55+
i += 1 # Skip the next message as we've already processed it
56+
else:
57+
# Add an empty tool response if there isn't one
58+
processed_messages.append(
59+
{"role": "tool", "tool_call_id": tool_id, "content": ""}
60+
)
61+
62+
elif message.get("role") == "function":
63+
# This handles orphaned function responses
64+
last_tool_id += 1
65+
tool_id = f"toolu_{last_tool_id}"
66+
67+
# Add a tool call before this orphaned tool response
68+
processed_messages.append(
69+
{
70+
"role": "assistant",
71+
"tool_calls": [
72+
{
73+
"id": tool_id,
74+
"type": "function",
75+
"function": {
76+
"name": "execute",
77+
"arguments": "# Automated tool call to fetch more output, triggered by the user.",
78+
},
79+
}
80+
],
81+
}
82+
)
83+
84+
# Process the function response
85+
message["role"] = "tool"
86+
message["tool_call_id"] = tool_id
87+
processed_messages.append(message)
88+
89+
else:
90+
# For non-tool-related messages, just add them as is
91+
processed_messages.append(message)
92+
93+
i += 1
94+
95+
return processed_messages
96+
97+
3098
def run_tool_calling_llm(llm, request_params):
3199
## Setup
32100

@@ -36,43 +104,72 @@ def run_tool_calling_llm(llm, request_params):
36104
]
37105
request_params["tools"] = [tool_schema]
38106

39-
last_tool_id = 0
40-
for i, message in enumerate(request_params["messages"]):
41-
if "function_call" in message:
42-
last_tool_id += 1
43-
function = message.pop("function_call")
44-
message["tool_calls"] = [
45-
{
46-
"id": "toolu_" + str(last_tool_id),
47-
"type": "function",
48-
"function": function,
49-
}
50-
]
51-
if message["role"] == "function":
52-
if i != 0 and request_params["messages"][i - 1]["role"] == "tool":
53-
request_params["messages"][i]["content"] += message["content"]
54-
message = None
55-
else:
56-
message["role"] = "tool"
57-
message["tool_call_id"] = "toolu_" + str(last_tool_id)
58-
59-
request_params["messages"] = [m for m in request_params["messages"] if m != None]
60-
61-
new_messages = []
62-
for i, message in enumerate(request_params["messages"]):
63-
new_messages.append(message)
64-
if "tool_calls" in message:
65-
tool_call_id = message["tool_calls"][0]["id"]
66-
if not any(
67-
m
68-
for m in request_params["messages"]
69-
if m.get("role") == "tool" and m.get("tool_call_id") == tool_call_id
70-
):
71-
new_messages.append(
72-
{"role": "tool", "tool_call_id": tool_call_id, "content": ""}
73-
)
107+
import pprint
108+
109+
pprint.pprint(
110+
[str(m)[:600] if len(str(m)) > 1000 else m for m in request_params["messages"]]
111+
)
112+
113+
print("PROCESSING")
114+
115+
request_params["messages"] = process_messages(request_params["messages"])
116+
117+
# # This makes any role: tool have the ID of the last tool call
118+
# last_tool_id = 0
119+
# for i, message in enumerate(request_params["messages"]):
120+
# if "function_call" in message:
121+
# last_tool_id += 1
122+
# function = message.pop("function_call")
123+
# message["tool_calls"] = [
124+
# {
125+
# "id": "toolu_" + str(last_tool_id),
126+
# "type": "function",
127+
# "function": function,
128+
# }
129+
# ]
130+
# if message["role"] == "function":
131+
# if i != 0 and request_params["messages"][i - 1]["role"] == "tool":
132+
# request_params["messages"][i]["content"] += message["content"]
133+
# message = None
134+
# else:
135+
# message["role"] = "tool"
136+
# message["tool_call_id"] = "toolu_" + str(last_tool_id)
137+
# request_params["messages"] = [m for m in request_params["messages"] if m != None]
138+
139+
# This adds an empty tool response for any tool call without a tool response
140+
# new_messages = []
141+
# for i, message in enumerate(request_params["messages"]):
142+
# new_messages.append(message)
143+
# if "tool_calls" in message:
144+
# tool_call_id = message["tool_calls"][0]["id"]
145+
# if not any(
146+
# m
147+
# for m in request_params["messages"]
148+
# if m.get("role") == "tool" and m.get("tool_call_id") == tool_call_id
149+
# ):
150+
# new_messages.append(
151+
# {"role": "tool", "tool_call_id": tool_call_id, "content": ""}
152+
# )
153+
# request_params["messages"] = new_messages
154+
155+
# messages = request_params["messages"]
156+
# for i in range(len(messages)):
157+
# if messages[i]["role"] == "user" and isinstance(messages[i]["content"], list):
158+
# # Found an image from the user
159+
# image_message = messages[i]
160+
# j = i + 1
161+
# while j < len(messages) and messages[j]["role"] == "tool":
162+
# # Move the image down until it's after all the role: tools
163+
# j += 1
164+
# messages.insert(j, image_message)
165+
# del messages[i]
166+
# request_params["messages"] = messages
167+
168+
import pprint
74169

75-
request_params["messages"] = new_messages
170+
pprint.pprint(
171+
[str(m)[:600] if len(str(m)) > 1000 else m for m in request_params["messages"]]
172+
)
76173

77174
# Add OpenAI's recommended function message
78175
# request_params["messages"][0][

interpreter/core/llm/utils/convert_to_openai_messages.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,58 @@ def convert_to_openai_messages(
155155
# print("Failed to shrink image. Proceeding with original image size.")
156156
pass
157157

158+
# Must be less than 5mb
159+
# Calculate the size of the original binary data in bytes
160+
content_size_bytes = len(message["content"]) * 3 / 4
161+
162+
# Convert the size to MB
163+
content_size_mb = content_size_bytes / (1024 * 1024)
164+
165+
# If the content size is greater than 5 MB, resize the image
166+
if content_size_mb > 5:
167+
try:
168+
# Decode the base64 image
169+
img_data = base64.b64decode(message["content"])
170+
img = Image.open(io.BytesIO(img_data))
171+
172+
# Calculate the size of the original binary data in bytes
173+
content_size_bytes = len(img_data)
174+
175+
# Convert the size to MB
176+
content_size_mb = content_size_bytes / (1024 * 1024)
177+
178+
# Run in a loop to make SURE it's less than 5mb
179+
while content_size_mb > 5:
180+
# Calculate the scale factor needed to reduce the image size to 5 MB
181+
scale_factor = (5 / content_size_mb) ** 0.5
182+
183+
# Calculate the new dimensions
184+
new_width = int(img.width * scale_factor)
185+
new_height = int(img.height * scale_factor)
186+
187+
# Resize the image
188+
img = img.resize((new_width, new_height))
189+
190+
# Convert the image back to base64
191+
buffered = io.BytesIO()
192+
img.save(buffered, format=extension)
193+
img_str = base64.b64encode(buffered.getvalue()).decode(
194+
"utf-8"
195+
)
196+
197+
# Set the content
198+
content = f"data:image/{extension};base64,{img_str}"
199+
200+
# Recalculate the size of the content in bytes
201+
content_size_bytes = len(content) * 3 / 4
202+
203+
# Convert the size to MB
204+
content_size_mb = content_size_bytes / (1024 * 1024)
205+
except:
206+
# This should be non blocking. It's not required
207+
# print("Failed to shrink image. Proceeding with original image size.")
208+
pass
209+
158210
elif message["format"] == "path":
159211
# Convert to base64
160212
image_path = message["content"]
@@ -198,6 +250,33 @@ def convert_to_openai_messages(
198250
],
199251
}
200252

253+
if message["role"] == "computer":
254+
new_message["content"].append(
255+
{
256+
"type": "text",
257+
"text": "This image is the result of the last tool output. What does it mean / are we done?",
258+
}
259+
)
260+
if message.get("format") == "path":
261+
if any(
262+
content.get("type") == "text"
263+
for content in new_message["content"]
264+
):
265+
for content in new_message["content"]:
266+
if content.get("type") == "text":
267+
content["text"] += (
268+
"\nThis image is at this path: "
269+
+ message["content"]
270+
)
271+
else:
272+
new_message["content"].append(
273+
{
274+
"type": "text",
275+
"text": "This image is at this path: "
276+
+ message["content"],
277+
}
278+
)
279+
201280
elif message["type"] == "file":
202281
new_message = {"role": "user", "content": message["content"]}
203282

0 commit comments

Comments
 (0)