ModelEngine-Group
diff --git a/‎backend/agents/create_agent_info.py‎
Lines changed: 8 additions & 0 deletions b/‎backend/agents/create_agent_info.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎backend/services/image_service.py‎
Lines changed: 21 additions & 0 deletions b/‎backend/services/image_service.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎backend/services/tool_configuration_service.py‎
Lines changed: 12 additions & 0 deletions b/‎backend/services/tool_configuration_service.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎doc/docs/zh/opensource-memorial-wall.md‎
Lines changed: 8 additions & 0 deletions b/‎doc/docs/zh/opensource-memorial-wall.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎frontend/app/[locale]/agents/components/PromptManager.tsx‎
Lines changed: 1 addition & 1 deletion b/‎frontend/app/[locale]/agents/components/PromptManager.tsx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdk/nexent/core/agents/nexent_agent.py‎
Lines changed: 5 additions & 0 deletions b/‎sdk/nexent/core/agents/nexent_agent.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎sdk/nexent/core/prompts/analyze_image.yaml‎
Lines changed: 14 additions & 0 deletions b/‎sdk/nexent/core/prompts/analyze_image.yaml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎sdk/nexent/core/prompts/analyze_image_en.yaml‎
Lines changed: 13 additions & 0 deletions b/‎sdk/nexent/core/prompts/analyze_image_en.yaml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎sdk/nexent/core/tools/__init__.py‎
Lines changed: 3 additions & 1 deletion b/‎sdk/nexent/core/tools/__init__.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎sdk/nexent/core/tools/analyze_image_tool.py‎
Lines changed: 138 additions & 0 deletions b/‎sdk/nexent/core/tools/analyze_image_tool.py‎
Lines changed: 138 additions & 0 deletions
@@ -17,9 +17,11 @@
 from services.tenant_config_service import get_selected_knowledge_list
 from services.remote_mcp_service import get_remote_mcp_server_list
 from services.memory_config_service import build_memory_context
+from services.image_service import get_vlm_model
 from database.agent_db import search_agent_info_by_agent_id, query_sub_agents_id_list
 from database.tool_db import search_tools_for_sub_agent
 from database.model_management_db import get_model_records, get_model_by_model_id
+from database.client import minio_client
 from utils.model_name_utils import add_repo_to_name
 from utils.prompt_template_utils import get_agent_prompt_template
 from utils.config_utils import tenant_config_manager, get_model_name_from_config
@@ -236,6 +238,12 @@ async def create_tool_config_list(agent_id, tenant_id, user_id):
                 "vdb_core": get_vector_db_core(),
                 "embedding_model": get_embedding_model(tenant_id=tenant_id),
             }
+        elif tool_config.class_name == "AnalyzeImageTool":
+            tool_config.metadata = {
+                "vlm_model": get_vlm_model(tenant_id=tenant_id),
+                "storage_client": minio_client,
+            }
+
         tool_config_list.append(tool_config)
 
     return tool_config_list
 
@@ -4,6 +4,11 @@
 import aiohttp
 
 from consts.const import DATA_PROCESS_SERVICE
+from consts.const import MODEL_CONFIG_MAPPING
+from utils.config_utils import tenant_config_manager, get_model_name_from_config
+
+from nexent import MessageObserver
+from nexent.core.models import OpenAIVLModel
 
 logger = logging.getLogger("image_service")
 
@@ -23,3 +28,19 @@ async def proxy_image_impl(decoded_url: str):
 
             result = await response.json()
             return result
+
+def get_vlm_model(tenant_id: str):
+    # Get the tenant config
+    vlm_model_config = tenant_config_manager.get_model_config(
+        key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id)
+    return OpenAIVLModel(
+                observer=MessageObserver(),
+                model_id=get_model_name_from_config(
+                    vlm_model_config) if vlm_model_config else "",
+                api_base=vlm_model_config.get("base_url", ""),
+                api_key=vlm_model_config.get("api_key", ""),
+                temperature=0.7,
+                top_p=0.7,
+                frequency_penalty=0.5,
+                max_tokens=512
+            )
@@ -25,6 +25,8 @@
 from database.user_tenant_db import get_all_tenant_ids
 from services.vectordatabase_service import get_embedding_model, get_vector_db_core
 from services.tenant_config_service import get_selected_knowledge_list
+from database.client import minio_client
+from services.image_service import get_vlm_model
 
 logger = logging.getLogger("tool_configuration_service")
 
@@ -613,6 +615,16 @@ def _validate_local_tool(
                 'embedding_model': embedding_model,
             }
             tool_instance = tool_class(**params)
+        elif tool_name == "analyze_image":
+            if not tenant_id or not user_id:
+                raise ToolExecutionException(f"Tenant ID and User ID are required for {tool_name} validation")
+            image_to_text_model = get_vlm_model(tenant_id=tenant_id)
+            params = {
+                **instantiation_params,
+                'vlm_model': image_to_text_model,
+                'storage_client': minio_client
+            }
+            tool_instance = tool_class(**params)
         else:
             tool_instance = tool_class(**instantiation_params)
 
 
@@ -516,3 +516,11 @@ nexent智能体帮助我学到更多的东西，赞！
 ::: info SkyWalker - 2025-11-26
 第一次使用nexent，想借此更快入手ai应用开发呀！
 :::
+
+:::info user - 2025-11-26
+Nexent开发者加油
+:::
+
+:::info NOSN - 2025-11-27
+Nexent越做越强大！
+:::
@@ -615,7 +615,7 @@ export default function PromptManager({
                   overflowY: "auto",
                 }}
                 autoSize={false}
-                disabled={!isEditingMode}
+                disabled={!isEditingMode || isGeneratingAgent}
               />
             </div>
 
 
@@ -83,6 +83,11 @@ def create_local_tool(self, tool_config: ToolConfig):
                     "vdb_core", None) if tool_config.metadata else None
                 tools_obj.embedding_model = tool_config.metadata.get(
                     "embedding_model", None) if tool_config.metadata else None
+            elif class_name == "AnalyzeImageTool":
+                tools_obj = tool_class(observer=self.observer,
+                                       vlm_model=tool_config.metadata.get("vlm_model", []),
+                                       storage_client=tool_config.metadata.get("storage_client", []),
+                                       **params)
             else:
                 tools_obj = tool_class(**params)
                 if hasattr(tools_obj, 'observer'):
 
@@ -0,0 +1,14 @@
+# 图片分析 Prompt 模板
+# 用于图片分析
+
+system_prompt: |-
+  用户提出了一个问题：{{ query }}，请从回答这个问题的角度精简、仔细描述一下这个图片，200字以内。
+  
+  **图片分析要求：**
+  1. 重点关注与用户问题相关的图片内容
+  2. 描述要精简明了，突出关键信息
+  3. 避免无关细节，专注于能帮助回答问题的内容
+  4. 保持客观描述，不要过度解读
+
+user_prompt: |
+  请仔细观察这张图片，并从回答用户问题的角度进行描述。
@@ -0,0 +1,13 @@
+# Image Understanding Prompt Templates
+
+system_prompt: |-
+  The user has asked a question: {{ query }}. Please provide a concise and careful description of this image from the perspective of answering this question, within 200 words.
+  
+  **Image Analysis Requirements:**
+  1. Focus on image content relevant to the user's question
+  2. Keep descriptions concise and clear, highlighting key information
+  3. Avoid irrelevant details, focus on content that helps answer the question
+  4. Maintain objective description, avoid over-interpretation
+
+user_prompt: |
+  Please carefully observe this image and describe it from the perspective of answering the user's question.
@@ -12,6 +12,7 @@
 from .move_item_tool import MoveItemTool
 from .list_directory_tool import ListDirectoryTool
 from .terminal_tool import TerminalTool
+from .analyze_image_tool import AnalyzeImageTool
 
 __all__ = [
     "ExaSearchTool", 
@@ -27,5 +28,6 @@
     "DeleteDirectoryTool",
     "MoveItemTool",
     "ListDirectoryTool",
-    "TerminalTool"
+    "TerminalTool",
+    "AnalyzeImageTool"
 ]
@@ -0,0 +1,138 @@
+""""
+Analyze Image Tool
+
+Analyze images using a large language model.
+Supports images from S3, HTTP, and HTTPS URLs.
+"""
+
+import json
+import logging
+from io import BytesIO
+from typing import List
+
+from jinja2 import Template, StrictUndefined
+from pydantic import Field
+from smolagents.tools import Tool
+
+from nexent.core.models import OpenAIVLModel
+from nexent.core.utils.observer import MessageObserver, ProcessType
+from nexent.core.utils.prompt_template_utils import get_prompt_template
+from nexent.core.utils.tools_common_message import ToolCategory, ToolSign
+from nexent.storage import MinIOStorageClient
+from nexent.multi_modal.load_save_object import LoadSaveObjectManager
+
+logger = logging.getLogger("analyze_image_tool")
+
+
+class AnalyzeImageTool(Tool):
+    """Tool for understanding and analyzing image using a visual language model"""
+
+    name = "analyze_image"
+    description = (
+        "This tool uses a visual language model to understand images based on your query and then returns a description of the image.\n"
+        "It is used to understand and analyze multiple images, with image sources supporting S3 URLs (s3://bucket/key or /bucket/key), "
+        "HTTP, and HTTPS URLs.\n"
+        "Use this tool when you want to retrieve information contained in an image and provide the image's URL and your query."
+    )
+    inputs = {
+        "image_urls_list": {
+            "type": "array",
+            "description": "List of image URLs (S3, HTTP, or HTTPS). Supports s3://bucket/key, /bucket/key, http://, and https:// URLs.",
+        },
+        "query": {
+            "type": "string",
+            "description": "User's question to guide the analysis"
+        }
+    }
+    output_type = "array"
+    category = ToolCategory.MULTIMODAL.value
+    tool_sign = ToolSign.MULTIMODAL_OPERATION.value
+
+    def __init__(
+            self,
+            observer: MessageObserver = Field(
+                description="Message observer",
+                default=None,
+                exclude=True),
+            vlm_model: OpenAIVLModel = Field(
+                description="The VLM model to use",
+                default=None,
+                exclude=True),
+            storage_client: MinIOStorageClient = Field(
+                description="Storage client for downloading files from S3 URLs、HTTP URLs、HTTPS URLs.",
+                default=None,
+                exclude=True)
+    ):
+        super().__init__()
+        self.observer = observer
+        self.vlm_model = vlm_model
+        self.storage_client = storage_client
+        # Create LoadSaveObjectManager with the storage client
+        self.mm = LoadSaveObjectManager(storage_client=self.storage_client)
+
+        # Dynamically apply the load_object decorator to forward method
+        self.forward = self.mm.load_object(input_names=["image_urls_list"])(self._forward_impl)
+
+        self.running_prompt_zh = "正在分析图片..."
+        self.running_prompt_en = "Analyzing image..."
+
+    def _forward_impl(self, image_urls_list: List[bytes], query: str) -> List[str]:
+        """
+        Analyze images identified by S3 URL, HTTP URL, or HTTPS URL and return the identified text.
+        
+        Note: This method is wrapped by load_object decorator which downloads
+        the image from S3 URL, HTTP URL, or HTTPS URL and passes bytes to this method.
+
+        Args:
+            image_urls_list: List of image bytes converted from URLs by the decorator.
+                             The load_object decorator converts URLs to bytes before calling this method.
+            query: User's question to guide the analysis
+
+        Returns:
+            List[str]: One analysis string per image that aligns with the order
+            of the provided images.
+
+        Raises:
+            Exception: If the image cannot be downloaded or analyzed.
+        """
+        # Send tool run message
+        if self.observer:
+            running_prompt = self.running_prompt_zh if self.observer.lang == "zh" else self.running_prompt_en
+            self.observer.add_message("", ProcessType.TOOL, running_prompt)
+            card_content = [{"icon": "image", "text": f"Analyzing images..."}]
+            self.observer.add_message("", ProcessType.CARD, json.dumps(card_content, ensure_ascii=False))
+
+        if image_urls_list is None:
+            raise ValueError("image_urls cannot be None")
+
+        if not isinstance(image_urls_list, list):
+            raise ValueError("image_urls must be a list of bytes")
+
+        if not image_urls_list:
+            raise ValueError("image_urls must contain at least one image")
+
+        # Load prompts from yaml file
+        language = self.observer.lang if self.observer else "en"
+        prompts = get_prompt_template(template_type='analyze_image', language=language)
+        system_prompt = Template(prompts['system_prompt'], undefined=StrictUndefined).render({'query': query})
+
+        try:
+            analysis_results: List[str] = []
+            for index, image_bytes in enumerate(image_urls_list, start=1):
+                logger.info(f"Extracting image #{index}, query: {query}")
+                image_stream = BytesIO(image_bytes)
+                try:
+                    response = self.vlm_model.analyze_image(
+                        image_input=image_stream,
+                        system_prompt=system_prompt
+                    )
+                except Exception as e:
+                    raise Exception(f"Error understanding image {index}: {str(e)}")
+
+                analysis_results.append(response.content)
+
+            return analysis_results
+        except Exception as e:
+            logger.error(f"Error analyzing image: {str(e)}", exc_info=True)
+            error_msg = f"Error analyzing image: {str(e)}"
+            raise Exception(error_msg)