@@ -110,10 +110,7 @@ def call_gpt_4_vision_preview(messages):
110110
111111 content = response .choices [0 ].message .content
112112
113- if content .startswith ("```json" ):
114- content = content [len ("```json" ) :] # Remove starting ```json
115- if content .endswith ("```" ):
116- content = content [: - len ("```" )] # Remove ending
113+ content = clean_json (content )
117114
118115 assistant_message = {"role" : "assistant" , "content" : content }
119116 if config .verbose :
@@ -234,36 +231,16 @@ async def call_gpt_4_vision_preview_ocr(messages, objective, model):
234231 response = client .chat .completions .create (
235232 model = "gpt-4-vision-preview" ,
236233 messages = messages ,
237- presence_penalty = 1 ,
238- frequency_penalty = 1 ,
239234 temperature = 0.7 ,
240235 max_tokens = 1000 ,
236+ stream = True ,
241237 )
242238
243239 content = response .choices [0 ].message .content
244240
245- # Remove starting and ending backticks
246- if content .startswith ("```json" ):
247- content = content [
248- len ("```json" ) :
249- ].strip () # Remove starting ```json and trim whitespace
250- elif content .startswith ("```" ):
251- content = content [
252- len ("```" ) :
253- ].strip () # Remove starting ``` and trim whitespace
254- if content .endswith ("```" ):
255- content = content [
256- : - len ("```" )
257- ].strip () # Remove ending ``` and trim whitespace
258-
259- # Normalize line breaks and remove any unwanted characters
260- content = "\n " .join (line .strip () for line in content .splitlines ())
261-
262- if config .verbose :
263- print (
264- "\n \n \n [call_gpt_4_vision_preview_ocr] content after cleaning" , content
265- )
241+ content = clean_json (content )
266242
243+ # used later for the messages
267244 content_str = content
268245
269246 content = json .loads (content )
@@ -387,24 +364,26 @@ async def call_gpt_4_vision_preview_labeled(messages, objective, model):
387364
388365 content = response .choices [0 ].message .content
389366
390- if content .startswith ("```json" ):
391- content = content [len ("```json" ) :] # Remove starting ```json
392- if content .endswith ("```" ):
393- content = content [: - len ("```" )] # Remove ending
367+ content = clean_json (content )
394368
395369 assistant_message = {"role" : "assistant" , "content" : content }
370+
371+ messages .append (assistant_message )
372+
373+ content = json .loads (content )
396374 if config .verbose :
397375 print (
398376 "[call_gpt_4_vision_preview_labeled] content" ,
399377 content ,
400378 )
401- messages .append (assistant_message )
402-
403- content = json .loads (content )
404379
405380 processed_content = []
406381
407382 for operation in content :
383+ print (
384+ "[call_gpt_4_vision_preview_labeled] for operation in content" ,
385+ operation ,
386+ )
408387 if operation .get ("operation" ) == "click" :
409388 label = operation .get ("label" )
410389 if config .verbose :
@@ -448,6 +427,12 @@ async def call_gpt_4_vision_preview_labeled(messages, objective, model):
448427 )
449428 processed_content .append (operation )
450429 else :
430+ if config .verbose :
431+ print (
432+ "[Self Operating Computer][call_gpt_4_vision_preview_labeled] .append none click operation" ,
433+ operation ,
434+ )
435+
451436 processed_content .append (operation )
452437
453438 if config .verbose :
@@ -510,10 +495,7 @@ def call_ollama_llava(messages):
510495
511496 content = response ["message" ]["content" ].strip ()
512497
513- if content .startswith ("```json" ):
514- content = content [len ("```json" ) :] # Remove starting ```json
515- if content .endswith ("```" ):
516- content = content [: - len ("```" )] # Remove ending
498+ content = clean_json (content )
517499
518500 assistant_message = {"role" : "assistant" , "content" : content }
519501 if config .verbose :
@@ -599,3 +581,28 @@ def confirm_system_prompt(messages, objective, model):
599581 print ("[confirm_system_prompt][message] role" , m ["role" ])
600582 print ("[confirm_system_prompt][message] content" , m ["content" ])
601583 print ("------------------[end message]------------------" )
584+
585+
586+ def clean_json (content ):
587+ if config .verbose :
588+ print ("\n \n [clean_json] content before cleaning" , content )
589+ if content .startswith ("```json" ):
590+ content = content [
591+ len ("```json" ) :
592+ ].strip () # Remove starting ```json and trim whitespace
593+ elif content .startswith ("```" ):
594+ content = content [
595+ len ("```" ) :
596+ ].strip () # Remove starting ``` and trim whitespace
597+ if content .endswith ("```" ):
598+ content = content [
599+ : - len ("```" )
600+ ].strip () # Remove ending ``` and trim whitespace
601+
602+ # Normalize line breaks and remove any unwanted characters
603+ content = "\n " .join (line .strip () for line in content .splitlines ())
604+
605+ if config .verbose :
606+ print ("\n \n [clean_json] content after cleaning" , content )
607+
608+ return content
0 commit comments