♻️ All models should use nexent.core.model instead of smolagent #1971

Phinease · web-flow · commit 3aad19d8d577 · 2025-12-08T16:07:04.000+08:00
diff --git a/backend/services/conversation_management_service.py b/backend/services/conversation_management_service.py
@@ -5,7 +5,6 @@
 from typing import Any, Dict, List, Optional
 
 from jinja2 import StrictUndefined, Template
-from smolagents import OpenAIServerModel
 
 from consts.const import LANGUAGE, MODEL_CONFIG_MAPPING, MESSAGE_ROLE, DEFAULT_EN_TITLE, DEFAULT_ZH_TITLE
 from consts.model import AgentRequest, ConversationResponse, MessageRequest, MessageUnit
@@ -27,7 +26,8 @@
     rename_conversation,
     update_message_opinion
 )
-from nexent.core.utils.observer import ProcessType
+from nexent.core.utils.observer import MessageObserver, ProcessType
+from nexent.core.models import OpenAIModel
 from utils.config_utils import get_model_name_from_config, tenant_config_manager
 from utils.prompt_template_utils import get_generate_title_prompt_template
 from utils.str_utils import remove_think_blocks
@@ -262,8 +262,8 @@ def call_llm_for_title(content: str, tenant_id: str, language: str = LANGUAGE["Z
     model_config = tenant_config_manager.get_model_config(
         key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
 
-    # Create OpenAIServerModel instance
-    llm = OpenAIServerModel(
+    # Create OpenAIModel instance
+    llm = OpenAIModel(
         model_id=get_model_name_from_config(model_config) if model_config.get("model_name") else "",
         api_base=model_config.get("base_url", ""),
         api_key=model_config.get("api_key", ""),
diff --git a/backend/services/vectordatabase_service.py b/backend/services/vectordatabase_service.py
@@ -760,16 +760,16 @@ async def summary_index_name(self,
             StreamingResponse containing the generated summary
         """
         try:
+            if not tenant_id:
+                raise Exception("Tenant ID is required for summary generation.")
+            
             from utils.document_vector_utils import (
                 process_documents_for_clustering,
                 kmeans_cluster_documents,
                 summarize_clusters_map_reduce,
                 merge_cluster_summaries
             )
             
-            if not tenant_id:
-                raise Exception("Tenant ID is required for summary generation.")
-            
             # Use new Map-Reduce approach
             sample_count = min(batch_size // 5, 200)  # Sample reasonable number of documents
             
diff --git a/backend/utils/document_vector_utils.py b/backend/utils/document_vector_utils.py
@@ -14,12 +14,16 @@
 
 import numpy as np
 from jinja2 import Template, StrictUndefined
-from nexent.vector_database.base import VectorDatabaseCore
 from sklearn.cluster import KMeans
 from sklearn.metrics import silhouette_score
 from sklearn.metrics.pairwise import cosine_similarity
 
 from consts.const import LANGUAGE
+from database.model_management_db import get_model_by_model_id
+from nexent.core.utils.observer import MessageObserver
+from nexent.core.models import OpenAIModel
+from nexent.vector_database.base import VectorDatabaseCore
+from utils.llm_utils import call_llm_for_system_prompt
 from utils.prompt_template_utils import (
     get_document_summary_prompt_template,
     get_cluster_summary_reduce_prompt_template,
@@ -568,37 +572,22 @@ def summarize_document(document_content: str, filename: str, language: str = LAN
         
         # Call LLM if model_id and tenant_id are provided
         if model_id and tenant_id:
-            from smolagents import OpenAIServerModel
-            from database.model_management_db import get_model_by_model_id
-            from utils.config_utils import get_model_name_from_config
-            from consts.const import MESSAGE_ROLE
-            
+
             # Get model configuration
             llm_model_config = get_model_by_model_id(model_id=model_id, tenant_id=tenant_id)
             if not llm_model_config:
                 logger.warning(f"No model configuration found for model_id: {model_id}, tenant_id: {tenant_id}")
                 return f"[Document Summary: {filename}] (max {max_words} words) - Content: {document_content[:200]}..."
-            
-            # Create LLM instance
-            llm = OpenAIServerModel(
-                model_id=get_model_name_from_config(llm_model_config) if llm_model_config else "",
-                api_base=llm_model_config.get("base_url", ""),
-                api_key=llm_model_config.get("api_key", ""),
-                temperature=0.3,
-                top_p=0.95
+
+            document_summary = call_llm_for_system_prompt(
+                model_id=model_id,
+                user_prompt=user_prompt,
+                system_prompt=system_prompt,
+                callback=None,
+                tenant_id=tenant_id
             )
-            
-            # Build messages
-            messages = [
-                {"role": MESSAGE_ROLE["SYSTEM"], "content": system_prompt},
-                {"role": MESSAGE_ROLE["USER"], "content": user_prompt}
-            ]
-            
-            # Call LLM, allow more tokens for generation
-            response = llm(messages, max_tokens=max_words * 2)
-            if not response or not response.content:
-                return ""
-            return response.content.strip()
+
+            return (document_summary or "").strip()
         else:
             # Fallback to placeholder if no model configuration
             logger.warning("No model_id or tenant_id provided, using placeholder summary")
@@ -642,10 +631,6 @@ def summarize_cluster(document_summaries: List[str], language: str = LANGUAGE["Z
         
         # Call LLM if model_id and tenant_id are provided
         if model_id and tenant_id:
-            from smolagents import OpenAIServerModel
-            from database.model_management_db import get_model_by_model_id
-            from utils.config_utils import get_model_name_from_config
-            from consts.const import MESSAGE_ROLE
             
             # Get model configuration
             llm_model_config = get_model_by_model_id(model_id=model_id, tenant_id=tenant_id)
@@ -654,25 +639,15 @@ def summarize_cluster(document_summaries: List[str], language: str = LANGUAGE["Z
                 return f"[Cluster Summary] (max {max_words} words) - Based on {len(document_summaries)} documents"
             
             # Create LLM instance
-            llm = OpenAIServerModel(
-                model_id=get_model_name_from_config(llm_model_config) if llm_model_config else "",
-                api_base=llm_model_config.get("base_url", ""),
-                api_key=llm_model_config.get("api_key", ""),
-                temperature=0.3,
-                top_p=0.95
+            cluster_summary = call_llm_for_system_prompt(
+                model_id=model_id,
+                user_prompt=user_prompt,
+                system_prompt=system_prompt,
+                callback=None,
+                tenant_id=tenant_id
             )
-            
-            # Build messages
-            messages = [
-                {"role": MESSAGE_ROLE["SYSTEM"], "content": system_prompt},
-                {"role": MESSAGE_ROLE["USER"], "content": user_prompt}
-            ]
-            
-            # Call LLM
-            response = llm(messages, max_tokens=max_words * 2)  # Allow more tokens for generation
-            if not response or not response.content:
-                return ""
-            return response.content.strip()
+
+            return (cluster_summary or "").strip()
         else:
             # Fallback to placeholder if no model configuration
             logger.warning("No model_id or tenant_id provided, using placeholder summary")
diff --git a/backend/utils/llm_utils.py b/backend/utils/llm_utils.py
@@ -1,10 +1,10 @@
 import logging
 from typing import Callable, List, Optional
 
-from smolagents import OpenAIServerModel
-
 from consts.const import MESSAGE_ROLE, THINK_END_PATTERN, THINK_START_PATTERN
 from database.model_management_db import get_model_by_model_id
+from nexent.core.utils.observer import MessageObserver
+from nexent.core.models import OpenAIModel
 from utils.config_utils import get_model_name_from_config
 
 logger = logging.getLogger("llm_utils")
@@ -44,7 +44,7 @@ def call_llm_for_system_prompt(
     """
     llm_model_config = get_model_by_model_id(model_id=model_id, tenant_id=tenant_id)
 
-    llm = OpenAIServerModel(
+    llm = OpenAIModel(
         model_id=get_model_name_from_config(llm_model_config) if llm_model_config else "",
         api_base=llm_model_config.get("base_url", ""),
         api_key=llm_model_config.get("api_key", ""),
diff --git a/sdk/nexent/core/models/openai_llm.py b/sdk/nexent/core/models/openai_llm.py
@@ -14,7 +14,7 @@
 logger = logging.getLogger("openai_llm")
 
 class OpenAIModel(OpenAIServerModel):
-    def __init__(self, observer: MessageObserver, temperature=0.2, top_p=0.95, 
+    def __init__(self, observer: MessageObserver = MessageObserver, temperature=0.2, top_p=0.95,
                  ssl_verify=True, *args, **kwargs):
         """
         Initialize OpenAI Model with observer and SSL verification option.
diff --git a/test/backend/services/test_conversation_management_service.py b/test/backend/services/test_conversation_management_service.py
@@ -327,7 +327,7 @@ def test_extract_user_messages(self):
         self.assertIn("Give me examples of AI applications", result)
         self.assertIn("AI stands for Artificial Intelligence.", result)
 
-    @patch('backend.services.conversation_management_service.OpenAIServerModel')
+    @patch('backend.services.conversation_management_service.OpenAIModel')
     @patch('backend.services.conversation_management_service.get_generate_title_prompt_template')
     @patch('backend.services.conversation_management_service.tenant_config_manager.get_model_config')
     def test_call_llm_for_title(self, mock_get_model_config, mock_get_prompt_template, mock_openai):
@@ -360,7 +360,7 @@ def test_call_llm_for_title(self, mock_get_model_config, mock_get_prompt_templat
         mock_llm_instance.generate.assert_called_once()
         mock_get_prompt_template.assert_called_once_with(language='zh')
 
-    @patch('backend.services.conversation_management_service.OpenAIServerModel')
+    @patch('backend.services.conversation_management_service.OpenAIModel')
     @patch('backend.services.conversation_management_service.get_generate_title_prompt_template')
     @patch('backend.services.conversation_management_service.tenant_config_manager.get_model_config')
     def test_call_llm_for_title_response_none_zh(self, mock_get_model_config, mock_get_prompt_template, mock_openai):
@@ -392,7 +392,7 @@ def test_call_llm_for_title_response_none_zh(self, mock_get_model_config, mock_g
         mock_llm_instance.generate.assert_called_once()
         mock_get_prompt_template.assert_called_once_with(language='zh')
 
-    @patch('backend.services.conversation_management_service.OpenAIServerModel')
+    @patch('backend.services.conversation_management_service.OpenAIModel')
     @patch('backend.services.conversation_management_service.get_generate_title_prompt_template')
     @patch('backend.services.conversation_management_service.tenant_config_manager.get_model_config')
     def test_call_llm_for_title_response_none_en(self, mock_get_model_config, mock_get_prompt_template, mock_openai):
diff --git a/test/backend/services/test_vectordatabase_service.py b/test/backend/services/test_vectordatabase_service.py
@@ -35,11 +35,19 @@ def _create_package_mock(name: str) -> MagicMock:
 sys.modules['nexent.core'] = _create_package_mock('nexent.core')
 sys.modules['nexent.core.agents'] = _create_package_mock('nexent.core.agents')
 sys.modules['nexent.core.agents.agent_model'] = MagicMock()
-sys.modules['nexent.core.models'] = _create_package_mock('nexent.core.models')
+# Mock nexent.core.models with OpenAIModel
+openai_model_module = ModuleType('nexent.core.models')
+openai_model_module.OpenAIModel = MagicMock
+sys.modules['nexent.core.models'] = openai_model_module
 sys.modules['nexent.core.models.embedding_model'] = MagicMock()
 sys.modules['nexent.core.models.stt_model'] = MagicMock()
 sys.modules['nexent.core.nlp'] = _create_package_mock('nexent.core.nlp')
 sys.modules['nexent.core.nlp.tokenizer'] = MagicMock()
+# Mock nexent.core.utils and observer module
+sys.modules['nexent.core.utils'] = _create_package_mock('nexent.core.utils')
+observer_module = ModuleType('nexent.core.utils.observer')
+observer_module.MessageObserver = MagicMock
+sys.modules['nexent.core.utils.observer'] = observer_module
 sys.modules['nexent.vector_database'] = _create_package_mock('nexent.vector_database')
 vector_db_base_module = ModuleType('nexent.vector_database.base')
 
@@ -96,6 +104,8 @@ class _VectorDatabaseCore:
 # Apply the patches before importing the module being tested
 with patch('botocore.client.BaseClient._make_api_call'), \
         patch('elasticsearch.Elasticsearch', return_value=MagicMock()):
+    # Import utils.document_vector_utils to ensure it's available for patching
+    import utils.document_vector_utils
     from backend.services.vectordatabase_service import ElasticSearchService, check_knowledge_base_exist_impl
 
 
diff --git a/test/backend/test_cluster_summarization.py b/test/backend/test_cluster_summarization.py
@@ -10,11 +10,39 @@
 import numpy as np
 import pytest
 
-# Add backend to path
+# Mock consts module before patching backend.database.client to avoid ImportError
+# backend.database.client imports from consts.const, so we need to mock it first
+consts_mock = MagicMock()
+consts_const_mock = MagicMock()
+# Set required constants that backend.database.client might use
+consts_const_mock.MINIO_ENDPOINT = "http://localhost:9000"
+consts_const_mock.MINIO_ACCESS_KEY = "test_access_key"
+consts_const_mock.MINIO_SECRET_KEY = "test_secret_key"
+consts_const_mock.MINIO_REGION = "us-east-1"
+consts_const_mock.MINIO_DEFAULT_BUCKET = "test-bucket"
+consts_const_mock.POSTGRES_HOST = "localhost"
+consts_const_mock.POSTGRES_USER = "test_user"
+consts_const_mock.NEXENT_POSTGRES_PASSWORD = "test_password"
+consts_const_mock.POSTGRES_DB = "test_db"
+consts_const_mock.POSTGRES_PORT = 5432
+consts_const_mock.LANGUAGE = {"ZH": "zh", "EN": "en"}
+consts_mock.const = consts_const_mock
+sys.modules['consts'] = consts_mock
+sys.modules['consts.const'] = consts_const_mock
+
+# Add backend to path before patching backend modules
 current_dir = os.path.dirname(os.path.abspath(__file__))
 backend_dir = os.path.abspath(os.path.join(current_dir, "../../backend"))
 sys.path.insert(0, backend_dir)
 
+# Patch storage factory and MinIO config validation to avoid errors during initialization
+# These patches must be started before any imports that use MinioClient
+storage_client_mock = MagicMock()
+minio_client_mock = MagicMock()
+patch('nexent.storage.storage_client_factory.create_storage_client_from_config', return_value=storage_client_mock).start()
+patch('nexent.storage.minio_config.MinIOStorageConfig.validate', lambda self: None).start()
+patch('backend.database.client.MinioClient', return_value=minio_client_mock).start()
+
 from backend.utils.document_vector_utils import (
     extract_cluster_content,
     summarize_cluster,
diff --git a/test/backend/test_document_vector_integration.py b/test/backend/test_document_vector_integration.py
@@ -11,11 +11,38 @@
 import numpy as np
 import pytest
 
-# Add backend to path
+# Mock consts module before patching backend.database.client to avoid ImportError
+# backend.database.client imports from consts.const, so we need to mock it first
+consts_mock = MagicMock()
+consts_const_mock = MagicMock()
+# Set required constants that backend.database.client might use
+consts_const_mock.MINIO_ENDPOINT = "http://localhost:9000"
+consts_const_mock.MINIO_ACCESS_KEY = "test_access_key"
+consts_const_mock.MINIO_SECRET_KEY = "test_secret_key"
+consts_const_mock.MINIO_REGION = "us-east-1"
+consts_const_mock.MINIO_DEFAULT_BUCKET = "test-bucket"
+consts_const_mock.POSTGRES_HOST = "localhost"
+consts_const_mock.POSTGRES_USER = "test_user"
+consts_const_mock.NEXENT_POSTGRES_PASSWORD = "test_password"
+consts_const_mock.POSTGRES_DB = "test_db"
+consts_const_mock.POSTGRES_PORT = 5432
+consts_mock.const = consts_const_mock
+sys.modules['consts'] = consts_mock
+sys.modules['consts.const'] = consts_const_mock
+
+# Add backend to path before patching backend modules
 current_dir = os.path.dirname(os.path.abspath(__file__))
 backend_dir = os.path.abspath(os.path.join(current_dir, "../../backend"))
 sys.path.insert(0, backend_dir)
 
+# Patch storage factory and MinIO config validation to avoid errors during initialization
+# These patches must be started before any imports that use MinioClient
+storage_client_mock = MagicMock()
+minio_client_mock = MagicMock()
+patch('nexent.storage.storage_client_factory.create_storage_client_from_config', return_value=storage_client_mock).start()
+patch('nexent.storage.minio_config.MinIOStorageConfig.validate', lambda self: None).start()
+patch('backend.database.client.MinioClient', return_value=minio_client_mock).start()
+
 from backend.utils.document_vector_utils import (
     calculate_document_embedding,
     auto_determine_k,
diff --git a/test/backend/test_document_vector_utils.py b/test/backend/test_document_vector_utils.py
diff --git a/test/backend/test_document_vector_utils_coverage.py b/test/backend/test_document_vector_utils_coverage.py
diff --git a/test/backend/test_summary_formatting.py b/test/backend/test_summary_formatting.py
diff --git a/test/backend/utils/test_llm_utils.py b/test/backend/utils/test_llm_utils.py