Skip to content

Commit bdfe9aa

Browse files
committed
Non anthropic OS mode
1 parent 7cc404c commit bdfe9aa

File tree

2 files changed

+87
-12
lines changed

2 files changed

+87
-12
lines changed

interpreter_1/interpreter.py

Lines changed: 84 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
import os
55
import platform
66
import sys
7+
import time
78
import traceback
89
import uuid
910
from datetime import datetime
1011
from typing import Any, cast
1112

12-
from prompt_toolkit import PromptSession
1313
from readchar import readchar
1414

1515
from .misc.get_input import get_input
@@ -24,6 +24,7 @@
2424
litellm.suppress_debug_info = True
2525
litellm.REPEATED_STREAMING_CHUNK_LIMIT = 99999999
2626
litellm.modify_params = True
27+
# litellm.drop_params = True
2728

2829
from anthropic import Anthropic
2930
from anthropic.types.beta import (
@@ -245,6 +246,10 @@ async def async_respond(self):
245246
provider = self.provider # Keep existing provider if set
246247
max_tokens = self.max_tokens # Keep existing max_tokens if set
247248

249+
if self.model == "claude-3-5-sonnet-latest":
250+
# For some reason, Litellm can't find the model info for claude-3-5-sonnet-latest
251+
provider = "anthropic"
252+
248253
# Only try to get model info if we need either provider or max_tokens
249254
if provider is None or max_tokens is None:
250255
try:
@@ -610,8 +615,53 @@ async def async_respond(self):
610615
}
611616
)
612617
if "gui" in self.tools:
613-
print("\nGUI is not supported for non-Anthropic models yet.\n")
614-
pass
618+
tools.append(
619+
{
620+
"type": "function",
621+
"function": {
622+
"name": "computer",
623+
"description": """Control the computer's mouse, keyboard and screen interactions
624+
* Coordinates are scaled to standard resolutions (max 1366x768)
625+
* Screenshots are automatically taken after most actions
626+
* For key commands, use normalized key names (e.g. 'pagedown' -> 'pgdn', 'enter'/'return' are interchangeable)
627+
* On macOS, 'super+' is automatically converted to 'command+'
628+
* Mouse movements use smooth easing for natural motion""",
629+
"parameters": {
630+
"type": "object",
631+
"properties": {
632+
"action": {
633+
"type": "string",
634+
"description": "The action to perform",
635+
"enum": [
636+
"key", # Send keyboard input (hotkeys or single keys)
637+
"type", # Type text with a slight delay between characters
638+
"mouse_move", # Move mouse cursor to coordinates
639+
"left_click", # Perform left mouse click
640+
"left_click_drag", # Click and drag from current pos to coordinates
641+
"right_click", # Perform right mouse click
642+
"middle_click", # Perform middle mouse click
643+
"double_click", # Perform double left click
644+
"screenshot", # Take a screenshot
645+
"cursor_position", # Get current cursor coordinates
646+
],
647+
},
648+
"text": {
649+
"type": "string",
650+
"description": "Text to type or key command to send (required for 'key' and 'type' actions)",
651+
},
652+
"coordinate": {
653+
"type": "array",
654+
"description": "X,Y coordinates for mouse actions (required for 'mouse_move' and 'left_click_drag')",
655+
"items": {"type": "integer"},
656+
"minItems": 2,
657+
"maxItems": 2,
658+
},
659+
},
660+
"required": ["action"],
661+
},
662+
},
663+
}
664+
)
615665

616666
if self.model.startswith("ollama/"):
617667
# Fix ollama
@@ -645,6 +695,7 @@ async def async_respond(self):
645695
"temperature": self.temperature,
646696
"api_key": self.api_key,
647697
"api_version": self.api_version,
698+
"parallel_tool_calls": False,
648699
}
649700

650701
if self.tool_calling:
@@ -658,7 +709,6 @@ async def async_respond(self):
658709

659710
if self.debug:
660711
print("Sending request...", params)
661-
import time
662712

663713
time.sleep(3)
664714

@@ -815,13 +865,36 @@ async def async_respond(self):
815865
result = ToolResult(output="Tool execution cancelled by user")
816866

817867
if self.tool_calling:
818-
self.messages.append(
819-
{
820-
"role": "tool",
821-
"content": json.dumps(dataclasses.asdict(result)),
822-
"tool_call_id": tool_call.id,
823-
}
824-
)
868+
if result.base64_image:
869+
# Add image to tool result
870+
self.messages.append(
871+
{
872+
"role": "tool",
873+
"content": "The user will reply with the image outputted by the tool.",
874+
"tool_call_id": tool_call.id,
875+
}
876+
)
877+
self.messages.append(
878+
{
879+
"role": "user",
880+
"content": [
881+
{
882+
"type": "image_url",
883+
"image_url": {
884+
"url": f"data:image/png;base64,{result.base64_image}",
885+
},
886+
}
887+
],
888+
}
889+
)
890+
else:
891+
self.messages.append(
892+
{
893+
"role": "tool",
894+
"content": json.dumps(dataclasses.asdict(result)),
895+
"tool_call_id": tool_call.id,
896+
}
897+
)
825898
else:
826899
self.messages.append(
827900
{

interpreter_1/profiles.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ def __init__(self):
3333
# Default values if no profile exists
3434
# Model configuration
3535
self.model = "claude-3-5-sonnet-latest" # The LLM model to use
36-
self.provider = "anthropic" # The model provider (e.g. anthropic, openai) None will auto-detect
36+
self.provider = (
37+
None # The model provider (e.g. anthropic, openai) None will auto-detect
38+
)
3739
self.temperature = 0 # Sampling temperature for model outputs (0-1)
3840
self.max_tokens = None # Maximum tokens in a message
3941

0 commit comments

Comments
 (0)