4
4
import os
5
5
import platform
6
6
import sys
7
+ import time
7
8
import traceback
8
9
import uuid
9
10
from datetime import datetime
10
11
from typing import Any , cast
11
12
12
- from prompt_toolkit import PromptSession
13
13
from readchar import readchar
14
14
15
15
from .misc .get_input import get_input
24
24
litellm .suppress_debug_info = True
25
25
litellm .REPEATED_STREAMING_CHUNK_LIMIT = 99999999
26
26
litellm .modify_params = True
27
+ # litellm.drop_params = True
27
28
28
29
from anthropic import Anthropic
29
30
from anthropic .types .beta import (
@@ -245,6 +246,10 @@ async def async_respond(self):
245
246
provider = self .provider # Keep existing provider if set
246
247
max_tokens = self .max_tokens # Keep existing max_tokens if set
247
248
249
+ if self .model == "claude-3-5-sonnet-latest" :
250
+ # For some reason, Litellm can't find the model info for claude-3-5-sonnet-latest
251
+ provider = "anthropic"
252
+
248
253
# Only try to get model info if we need either provider or max_tokens
249
254
if provider is None or max_tokens is None :
250
255
try :
@@ -610,8 +615,53 @@ async def async_respond(self):
610
615
}
611
616
)
612
617
if "gui" in self .tools :
613
- print ("\n GUI is not supported for non-Anthropic models yet.\n " )
614
- pass
618
+ tools .append (
619
+ {
620
+ "type" : "function" ,
621
+ "function" : {
622
+ "name" : "computer" ,
623
+ "description" : """Control the computer's mouse, keyboard and screen interactions
624
+ * Coordinates are scaled to standard resolutions (max 1366x768)
625
+ * Screenshots are automatically taken after most actions
626
+ * For key commands, use normalized key names (e.g. 'pagedown' -> 'pgdn', 'enter'/'return' are interchangeable)
627
+ * On macOS, 'super+' is automatically converted to 'command+'
628
+ * Mouse movements use smooth easing for natural motion""" ,
629
+ "parameters" : {
630
+ "type" : "object" ,
631
+ "properties" : {
632
+ "action" : {
633
+ "type" : "string" ,
634
+ "description" : "The action to perform" ,
635
+ "enum" : [
636
+ "key" , # Send keyboard input (hotkeys or single keys)
637
+ "type" , # Type text with a slight delay between characters
638
+ "mouse_move" , # Move mouse cursor to coordinates
639
+ "left_click" , # Perform left mouse click
640
+ "left_click_drag" , # Click and drag from current pos to coordinates
641
+ "right_click" , # Perform right mouse click
642
+ "middle_click" , # Perform middle mouse click
643
+ "double_click" , # Perform double left click
644
+ "screenshot" , # Take a screenshot
645
+ "cursor_position" , # Get current cursor coordinates
646
+ ],
647
+ },
648
+ "text" : {
649
+ "type" : "string" ,
650
+ "description" : "Text to type or key command to send (required for 'key' and 'type' actions)" ,
651
+ },
652
+ "coordinate" : {
653
+ "type" : "array" ,
654
+ "description" : "X,Y coordinates for mouse actions (required for 'mouse_move' and 'left_click_drag')" ,
655
+ "items" : {"type" : "integer" },
656
+ "minItems" : 2 ,
657
+ "maxItems" : 2 ,
658
+ },
659
+ },
660
+ "required" : ["action" ],
661
+ },
662
+ },
663
+ }
664
+ )
615
665
616
666
if self .model .startswith ("ollama/" ):
617
667
# Fix ollama
@@ -645,6 +695,7 @@ async def async_respond(self):
645
695
"temperature" : self .temperature ,
646
696
"api_key" : self .api_key ,
647
697
"api_version" : self .api_version ,
698
+ "parallel_tool_calls" : False ,
648
699
}
649
700
650
701
if self .tool_calling :
@@ -658,7 +709,6 @@ async def async_respond(self):
658
709
659
710
if self .debug :
660
711
print ("Sending request..." , params )
661
- import time
662
712
663
713
time .sleep (3 )
664
714
@@ -815,13 +865,36 @@ async def async_respond(self):
815
865
result = ToolResult (output = "Tool execution cancelled by user" )
816
866
817
867
if self .tool_calling :
818
- self .messages .append (
819
- {
820
- "role" : "tool" ,
821
- "content" : json .dumps (dataclasses .asdict (result )),
822
- "tool_call_id" : tool_call .id ,
823
- }
824
- )
868
+ if result .base64_image :
869
+ # Add image to tool result
870
+ self .messages .append (
871
+ {
872
+ "role" : "tool" ,
873
+ "content" : "The user will reply with the image outputted by the tool." ,
874
+ "tool_call_id" : tool_call .id ,
875
+ }
876
+ )
877
+ self .messages .append (
878
+ {
879
+ "role" : "user" ,
880
+ "content" : [
881
+ {
882
+ "type" : "image_url" ,
883
+ "image_url" : {
884
+ "url" : f"data:image/png;base64,{ result .base64_image } " ,
885
+ },
886
+ }
887
+ ],
888
+ }
889
+ )
890
+ else :
891
+ self .messages .append (
892
+ {
893
+ "role" : "tool" ,
894
+ "content" : json .dumps (dataclasses .asdict (result )),
895
+ "tool_call_id" : tool_call .id ,
896
+ }
897
+ )
825
898
else :
826
899
self .messages .append (
827
900
{
0 commit comments