1313from ... import MinIOStorageClient
1414from ...multi_modal .load_save_object import LoadSaveObjectManager
1515
16- logger = logging .getLogger ("image_understanding_tool " )
16+ logger = logging .getLogger ("analyze_image_tool " )
1717
1818
19- class ImageUnderstandingTool (Tool ):
20- """Tool for extracting text from images stored in S3-compatible storage. """
19+ class AnalyzeImageTool (Tool ):
20+ """Tool for understanding and analyzing image """
2121
22- name = "image_understanding "
22+ name = "analyze_image "
2323 description = (
24- "Understand an image stored in S3-compatible storage or HTTP and return the text content inside the image. "
25- "Provide the object location via an s3:// URL or http:// URL or https:// URL."
24+ "This tool uses a visual language model to understand images based on your query and then returns a description of the image."
25+ "It's used to understand and analyze images stored in S3 buckets, via HTTP and HTTPS."
26+ "Use this tool when you want to retrieve information contained in an image and provide the image's URL and your query."
2627 )
2728 inputs = {
2829 "image_url" : {
@@ -45,32 +46,29 @@ def __init__(
4546 observer : MessageObserver = Field (description = "Message observer" , default = None , exclude = True ),
4647 vlm_model : OpenAIVLModel = Field (description = "The VLM model to use" , default = None , exclude = True ),
4748 storage_client : MinIOStorageClient = Field (description = "Storage client to use" , default = None , exclude = True ),
48- # todo 这么写对不对
49- system_prompt_template : Template = Field (description = "System prompt template to use" , default = None , exclude = True ),
5049 ):
5150 super ().__init__ ()
5251 self .observer = observer
5352 self .vlm_model = vlm_model
5453 self .storage_client = storage_client
55- self .system_prompt_template = system_prompt_template
5654 # Create LoadSaveObjectManager with the storage client
5755 self .mm = LoadSaveObjectManager (storage_client = self .storage_client )
5856
5957 # Dynamically apply the load_object decorator to forward method
6058 self .forward = self .mm .load_object (input_names = ["image_url" ])(self ._forward_impl )
6159
62- self .running_prompt_zh = "正在理解图片 ..."
63- self .running_prompt_en = "Understanding image..."
60+ self .running_prompt_zh = "正在分析图片 ..."
61+ self .running_prompt_en = "Analyzing image..."
6462
6563 def _forward_impl (self , image_url : bytes , query : str ) -> str :
6664 """
67- Analyze the image specified by the S3 URL and return recognized text.
65+ Analyze images of S3 URL, HTTP URL, or HTTPS URL and return the identified text.
6866
6967 Note: This method is wrapped by load_object decorator which downloads
70- the image from S3 URL and passes bytes to this method.
68+ the image from S3 URL, HTTP URL, or HTTPS URL and passes bytes to this method.
7169
7270 Args:
73- image_url: Image bytes (converted from S3 URL by decorator).
71+ image_url: Image bytes (converted from S3 URL, HTTP URL, or HTTPS URL by decorator).
7472
7573 Returns:
7674 JSON string containing the recognized text.
@@ -85,23 +83,21 @@ def _forward_impl(self, image_url: bytes, query: str) -> str:
8583 if self .observer :
8684 running_prompt = self .running_prompt_zh if self .observer .lang == "zh" else self .running_prompt_en
8785 self .observer .add_message ("" , ProcessType .TOOL , running_prompt )
88- card_content = [{"icon" : "image" , "text" : "Processing image..." }]
86+ card_content = [{"icon" : "image" , "text" : "Analyzing image..." }]
8987 self .observer .add_message ("" , ProcessType .CARD , json .dumps (card_content , ensure_ascii = False ))
9088
9189 # Load prompts from yaml file
92- prompts = get_prompt_template (template_type = 'understand_image' , language = self .observer .lang )
90+ prompts = get_prompt_template (template_type = 'analyze_image' , language = self .observer .lang )
9391
9492 try :
9593
9694 response = self .vlm_model .analyze_image (
9795 image_input = image_stream ,
98- system_prompt = Template (prompts ['system_prompt' ],undefined = StrictUndefined ).render ({'query' : query }))
96+ system_prompt = Template (prompts ['system_prompt' ], undefined = StrictUndefined ).render ({'query' : query }))
9997 except Exception as e :
10098 raise Exception (f"Error understanding image: { str (e )} " )
10199 text = response .content
102100 # Record the detailed content of this search
103- search_results_data = {'text' :text }
104- if self .observer :
105- search_results_data = json .dumps (search_results_data , ensure_ascii = False )
106- self .observer .add_message ("" , ProcessType .SEARCH_CONTENT , search_results_data )
101+ # todo 返回的结构体是什么?
102+ search_results_data = {'text' : text }
107103 return json .dumps (search_results_data , ensure_ascii = False )
0 commit comments