Skip to content

Commit 82b32b3

Browse files
committed
✨ image to text tool
1 parent ea542c5 commit 82b32b3

File tree

2 files changed

+40
-1
lines changed

2 files changed

+40
-1
lines changed

backend/services/tool_configuration_service.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import Any, List, Optional, Dict
77
from urllib.parse import urljoin
88

9+
from jinja2 import Template, StrictUndefined
910
from pydantic_core import PydanticUndefined
1011
from fastmcp import Client
1112
import jsonref
@@ -26,6 +27,13 @@
2627
from services.elasticsearch_service import get_embedding_model, elastic_core
2728
from services.tenant_config_service import get_selected_knowledge_list
2829

30+
from backend.consts.const import MODEL_CONFIG_MAPPING
31+
from backend.database.client import minio_client, MinioClient
32+
from backend.utils.config_utils import tenant_config_manager, get_model_name_from_config
33+
from backend.utils.prompt_template_utils import get_analyze_file_prompt_template
34+
from sdk.nexent import MessageObserver
35+
from sdk.nexent.core.models import OpenAIVLModel
36+
2937
logger = logging.getLogger("tool_configuration_service")
3038

3139

@@ -612,6 +620,35 @@ def _validate_local_tool(
612620
'embedding_model': embedding_model
613621
}
614622
tool_instance = tool_class(**params)
623+
elif tool_name == "image_text_understanding_tool":
624+
if not tenant_id or not user_id:
625+
raise ToolExecutionException(f"Tenant ID and User ID are required for {tool_name} validation")
626+
vlm_model_config = tenant_config_manager.get_model_config(
627+
key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id)
628+
image_to_text_model = OpenAIVLModel(
629+
observer=MessageObserver(),
630+
model_id=get_model_name_from_config(
631+
vlm_model_config) if vlm_model_config else "",
632+
api_base=vlm_model_config.get("base_url", ""),
633+
api_key=vlm_model_config.get("api_key", ""),
634+
temperature=0.7,
635+
top_p=0.7,
636+
frequency_penalty=0.5,
637+
max_tokens=512
638+
)
639+
# Load prompts from yaml file
640+
language = 'zh'
641+
prompts = get_analyze_file_prompt_template(language)
642+
system_prompt_template = Template(prompts['image_analysis']['system_prompt'],
643+
undefined=StrictUndefined)
644+
645+
params = {
646+
**instantiation_params,
647+
'vlm_model': image_to_text_model,
648+
'storage_client': minio_client,
649+
'system_prompt_template': system_prompt_template
650+
}
651+
tool_instance = tool_class(**params)
615652
else:
616653
tool_instance = tool_class(**instantiation_params)
617654

sdk/nexent/core/tools/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .move_item_tool import MoveItemTool
1313
from .list_directory_tool import ListDirectoryTool
1414
from .terminal_tool import TerminalTool
15+
from .image_text_understanding_tool import ImageTextUnderstandingTool
1516

1617
__all__ = [
1718
"ExaSearchTool",
@@ -27,5 +28,6 @@
2728
"DeleteDirectoryTool",
2829
"MoveItemTool",
2930
"ListDirectoryTool",
30-
"TerminalTool"
31+
"TerminalTool",
32+
"ImageTextUnderstandingTool"
3133
]

0 commit comments

Comments
 (0)