55from typing import List , Optional
66import requests
77from .base_node import BaseNode
8+ from ..utils .logging import get_logger
89
910class GenerateAnswerFromImageNode (BaseNode ):
1011 """
@@ -26,6 +27,8 @@ def execute(self, state: dict) -> dict:
2627 Processes images from the state, generates answers,
2728 consolidates the results, and updates the state.
2829 """
30+ self .logger .info (f"--- Executing { self .node_name } Node ---" )
31+
2932 images = state .get ('screenshots' , [])
3033 analyses = []
3134
@@ -38,51 +41,52 @@ def execute(self, state: dict) -> dict:
3841 is not supported. Supported models are:
3942 { ', ' .join (supported_models )} .""" )
4043
41- for image_data in images :
42- base64_image = base64 .b64encode (image_data ).decode ('utf-8' )
44+ if self .node_config ["config" ]["llm" ]["model" ].startswith ("gpt" ):
45+ for image_data in images :
46+ base64_image = base64 .b64encode (image_data ).decode ('utf-8' )
4347
44- headers = {
45- "Content-Type" : "application/json" ,
46- "Authorization" : f"Bearer { api_key } "
47- }
48+ headers = {
49+ "Content-Type" : "application/json" ,
50+ "Authorization" : f"Bearer { api_key } "
51+ }
4852
49- payload = {
50- "model" : self .node_config ["config" ]["llm" ]["model" ],
51- "messages" : [
52- {
53- "role" : "user" ,
54- "content" : [
55- {
56- "type" : "text" ,
57- "text" : state .get ("user_prompt" ,
58- "Extract information from the image" )
59- },
60- {
61- "type" : "image_url" ,
62- "image_url" : {
63- "url" : f"data:image/jpeg;base64,{ base64_image } "
53+ payload = {
54+ "model" : self .node_config ["config" ]["llm" ]["model" ],
55+ "messages" : [
56+ {
57+ "role" : "user" ,
58+ "content" : [
59+ {
60+ "type" : "text" ,
61+ "text" : state .get ("user_prompt" ,
62+ "Extract information from the image" )
63+ },
64+ {
65+ "type" : "image_url" ,
66+ "image_url" : {
67+ "url" : f"data:image/jpeg;base64,{ base64_image } "
68+ }
6469 }
65- }
66- ]
67- }
68- ],
69- "max_tokens" : 300
70- }
70+ ]
71+ }
72+ ],
73+ "max_tokens" : 300
74+ }
7175
72- response = requests .post ("https://api.openai.com/v1/chat/completions" ,
73- headers = headers ,
74- json = payload ,
75- timeout = 10 )
76- result = response .json ()
76+ response = requests .post ("https://api.openai.com/v1/chat/completions" ,
77+ headers = headers ,
78+ json = payload ,
79+ timeout = 10 )
80+ result = response .json ()
7781
78- response_text = result .get ('choices' ,
79- [{}])[0 ].get ('message' , {}).get ('content' , 'No response' )
80- analyses .append (response_text )
82+ response_text = result .get ('choices' ,
83+ [{}])[0 ].get ('message' , {}).get ('content' , 'No response' )
84+ analyses .append (response_text )
8185
82- consolidated_analysis = " " .join (analyses )
86+ consolidated_analysis = " " .join (analyses )
8387
84- state ['answer' ] = {
85- "consolidated_analysis" : consolidated_analysis
86- }
88+ state ['answer' ] = {
89+ "consolidated_analysis" : consolidated_analysis
90+ }
8791
88- return state
92+ return state
0 commit comments