ModelEngine-Group
diff --git a/‎backend/apps/file_management_app.py‎
Lines changed: 33 additions & 1 deletion b/‎backend/apps/file_management_app.py‎
Lines changed: 33 additions & 1 deletion
diff --git a/‎backend/services/conversation_management_service.py‎
Lines changed: 4 additions & 4 deletions b/‎backend/services/conversation_management_service.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎backend/services/vectordatabase_service.py‎
Lines changed: 3 additions & 3 deletions b/‎backend/services/vectordatabase_service.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backend/utils/document_vector_utils.py‎
Lines changed: 23 additions & 48 deletions b/‎backend/utils/document_vector_utils.py‎
Lines changed: 23 additions & 48 deletions
diff --git a/‎backend/utils/llm_utils.py‎
Lines changed: 3 additions & 3 deletions b/‎backend/utils/llm_utils.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/docs/en/deployment/upgrade-guide.md‎
Lines changed: 58 additions & 42 deletions b/‎doc/docs/en/deployment/upgrade-guide.md‎
Lines changed: 58 additions & 42 deletions
@@ -1,5 +1,6 @@
 import logging
 import re
+import base64
 from http import HTTPStatus
 from typing import List, Optional
 from urllib.parse import urlparse, urlunparse, unquote, quote
@@ -149,7 +150,16 @@ async def process_files(
 @file_management_config_router.get("/download/{object_name:path}")
 async def get_storage_file(
     object_name: str = PathParam(..., description="File object name"),
-    download: str = Query("ignore", description="How to get the file"),
+    download: str = Query(
+        "ignore",
+        description=(
+            "How to get the file: "
+            "'ignore' (default, return file info), "
+            "'stream' (return file stream), "
+            "'redirect' (redirect to download URL), "
+            "'base64' (return base64-encoded content for images)."
+        ),
+    ),
     expires: int = Query(3600, description="URL validity period (seconds)"),
     filename: Optional[str] = Query(None, description="Original filename for download (optional)")
 ):
@@ -192,6 +202,28 @@ async def get_storage_file(
                     "ETag": f'"{object_name}"',
                 }
             )
+        elif download == "base64":
+            # Return base64 encoded file content (primarily for images)
+            file_stream, content_type = await get_file_stream_impl(object_name=object_name)
+            try:
+                data = file_stream.read()
+            except Exception as exc:
+                logger.error("Failed to read file stream for base64: %s", str(exc))
+                raise HTTPException(
+                    status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+                    detail="Failed to read file content for base64 encoding",
+                )
+
+            base64_content = base64.b64encode(data).decode("utf-8")
+            return JSONResponse(
+                status_code=HTTPStatus.OK,
+                content={
+                    "success": True,
+                    "base64": base64_content,
+                    "content_type": content_type,
+                    "object_name": object_name,
+                },
+            )
         else:
             # return file metadata
             return await get_file_url_impl(object_name=object_name, expires=expires)
 
@@ -5,7 +5,6 @@
 from typing import Any, Dict, List, Optional
 
 from jinja2 import StrictUndefined, Template
-from smolagents import OpenAIServerModel
 
 from consts.const import LANGUAGE, MODEL_CONFIG_MAPPING, MESSAGE_ROLE, DEFAULT_EN_TITLE, DEFAULT_ZH_TITLE
 from consts.model import AgentRequest, ConversationResponse, MessageRequest, MessageUnit
@@ -27,7 +26,8 @@
     rename_conversation,
     update_message_opinion
 )
-from nexent.core.utils.observer import ProcessType
+from nexent.core.utils.observer import MessageObserver, ProcessType
+from nexent.core.models import OpenAIModel
 from utils.config_utils import get_model_name_from_config, tenant_config_manager
 from utils.prompt_template_utils import get_generate_title_prompt_template
 from utils.str_utils import remove_think_blocks
@@ -262,8 +262,8 @@ def call_llm_for_title(content: str, tenant_id: str, language: str = LANGUAGE["Z
     model_config = tenant_config_manager.get_model_config(
         key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
 
-    # Create OpenAIServerModel instance
-    llm = OpenAIServerModel(
+    # Create OpenAIModel instance
+    llm = OpenAIModel(
         model_id=get_model_name_from_config(model_config) if model_config.get("model_name") else "",
         api_base=model_config.get("base_url", ""),
         api_key=model_config.get("api_key", ""),
 
@@ -1001,16 +1001,16 @@ async def summary_index_name(self,
             StreamingResponse containing the generated summary
         """
         try:
+            if not tenant_id:
+                raise Exception("Tenant ID is required for summary generation.")
+            
             from utils.document_vector_utils import (
                 process_documents_for_clustering,
                 kmeans_cluster_documents,
                 summarize_clusters_map_reduce,
                 merge_cluster_summaries
             )
 
-            if not tenant_id:
-                raise Exception("Tenant ID is required for summary generation.")
-            
             # Use new Map-Reduce approach
             sample_count = min(batch_size // 5, 200)  # Sample reasonable number of documents
 
 
@@ -14,12 +14,16 @@
 
 import numpy as np
 from jinja2 import Template, StrictUndefined
-from nexent.vector_database.base import VectorDatabaseCore
 from sklearn.cluster import KMeans
 from sklearn.metrics import silhouette_score
 from sklearn.metrics.pairwise import cosine_similarity
 
 from consts.const import LANGUAGE
+from database.model_management_db import get_model_by_model_id
+from nexent.core.utils.observer import MessageObserver
+from nexent.core.models import OpenAIModel
+from nexent.vector_database.base import VectorDatabaseCore
+from utils.llm_utils import call_llm_for_system_prompt
 from utils.prompt_template_utils import (
     get_document_summary_prompt_template,
     get_cluster_summary_reduce_prompt_template,
@@ -568,37 +572,22 @@ def summarize_document(document_content: str, filename: str, language: str = LAN
 
         # Call LLM if model_id and tenant_id are provided
         if model_id and tenant_id:
-            from smolagents import OpenAIServerModel
-            from database.model_management_db import get_model_by_model_id
-            from utils.config_utils import get_model_name_from_config
-            from consts.const import MESSAGE_ROLE
-            
+
             # Get model configuration
             llm_model_config = get_model_by_model_id(model_id=model_id, tenant_id=tenant_id)
             if not llm_model_config:
                 logger.warning(f"No model configuration found for model_id: {model_id}, tenant_id: {tenant_id}")
                 return f"[Document Summary: {filename}] (max {max_words} words) - Content: {document_content[:200]}..."
-            
-            # Create LLM instance
-            llm = OpenAIServerModel(
-                model_id=get_model_name_from_config(llm_model_config) if llm_model_config else "",
-                api_base=llm_model_config.get("base_url", ""),
-                api_key=llm_model_config.get("api_key", ""),
-                temperature=0.3,
-                top_p=0.95
+
+            document_summary = call_llm_for_system_prompt(
+                model_id=model_id,
+                user_prompt=user_prompt,
+                system_prompt=system_prompt,
+                callback=None,
+                tenant_id=tenant_id
             )
-            
-            # Build messages
-            messages = [
-                {"role": MESSAGE_ROLE["SYSTEM"], "content": system_prompt},
-                {"role": MESSAGE_ROLE["USER"], "content": user_prompt}
-            ]
-            
-            # Call LLM, allow more tokens for generation
-            response = llm(messages, max_tokens=max_words * 2)
-            if not response or not response.content:
-                return ""
-            return response.content.strip()
+
+            return (document_summary or "").strip()
         else:
             # Fallback to placeholder if no model configuration
             logger.warning("No model_id or tenant_id provided, using placeholder summary")
@@ -642,10 +631,6 @@ def summarize_cluster(document_summaries: List[str], language: str = LANGUAGE["Z
 
         # Call LLM if model_id and tenant_id are provided
         if model_id and tenant_id:
-            from smolagents import OpenAIServerModel
-            from database.model_management_db import get_model_by_model_id
-            from utils.config_utils import get_model_name_from_config
-            from consts.const import MESSAGE_ROLE
 
             # Get model configuration
             llm_model_config = get_model_by_model_id(model_id=model_id, tenant_id=tenant_id)
@@ -654,25 +639,15 @@ def summarize_cluster(document_summaries: List[str], language: str = LANGUAGE["Z
                 return f"[Cluster Summary] (max {max_words} words) - Based on {len(document_summaries)} documents"
 
             # Create LLM instance
-            llm = OpenAIServerModel(
-                model_id=get_model_name_from_config(llm_model_config) if llm_model_config else "",
-                api_base=llm_model_config.get("base_url", ""),
-                api_key=llm_model_config.get("api_key", ""),
-                temperature=0.3,
-                top_p=0.95
+            cluster_summary = call_llm_for_system_prompt(
+                model_id=model_id,
+                user_prompt=user_prompt,
+                system_prompt=system_prompt,
+                callback=None,
+                tenant_id=tenant_id
             )
-            
-            # Build messages
-            messages = [
-                {"role": MESSAGE_ROLE["SYSTEM"], "content": system_prompt},
-                {"role": MESSAGE_ROLE["USER"], "content": user_prompt}
-            ]
-            
-            # Call LLM
-            response = llm(messages, max_tokens=max_words * 2)  # Allow more tokens for generation
-            if not response or not response.content:
-                return ""
-            return response.content.strip()
+
+            return (cluster_summary or "").strip()
         else:
             # Fallback to placeholder if no model configuration
             logger.warning("No model_id or tenant_id provided, using placeholder summary")
 
@@ -1,10 +1,10 @@
 import logging
 from typing import Callable, List, Optional
 
-from smolagents import OpenAIServerModel
-
 from consts.const import MESSAGE_ROLE, THINK_END_PATTERN, THINK_START_PATTERN
 from database.model_management_db import get_model_by_model_id
+from nexent.core.utils.observer import MessageObserver
+from nexent.core.models import OpenAIModel
 from utils.config_utils import get_model_name_from_config
 
 logger = logging.getLogger("llm_utils")
@@ -44,7 +44,7 @@ def call_llm_for_system_prompt(
     """
     llm_model_config = get_model_by_model_id(model_id=model_id, tenant_id=tenant_id)
 
-    llm = OpenAIServerModel(
+    llm = OpenAIModel(
         model_id=get_model_name_from_config(llm_model_config) if llm_model_config else "",
         api_base=llm_model_config.get("base_url", ""),
         api_key=llm_model_config.get("api_key", ""),
 
@@ -2,18 +2,62 @@
 
 ## 🚀 Upgrade Overview
 
-Follow these four steps to upgrade Nexent safely:
+Follow these steps to upgrade Nexent safely:
 
-1. Clean up existing containers and images
-2. Pull the latest code and run the deployment script
-3. Apply database migrations
-4. Verify the deployment in your browser
+1. Pull the latest code
+2. Execute the upgrade script
+3. Open the site to confirm service availability
 
 ---
 
-## 🧹 Step 1: Clean up old images
+## 🔄 Step 1: Update Code
 
-Remove cached resources to avoid conflicts when redeploying:
+Before updating, record the current deployment version and data directory information.
+
+- Current Deployment Version Location: APP_VERSION in backend/consts/const.py
+- Data Directory Location: ROOT_DIR in docker/.env
+
+**Code downloaded via git**
+
+Update the code using git commands:
+
+```bash
+git pull
+```
+
+**Code downloaded via ZIP package or other means**
+
+1. Re-download the latest code from GitHub and extract it.
+2. If it exists, copy the deploy.options file from the docker directory of your previous deployment script directory to the docker directory of the new code directory. (If the file doesn't exist, you can ignore this step).
+
+## 🔄 Step 2: Execute the Upgrade
+
+Navigate to the docker directory of the updated code and run the upgrade script:
+
+```bash
+bash upgrade.sh
+```
+
+If deploy.options is missing, the script will prompt you to manually enter configuration details from the previous deployment, such as the current version and data directory. Enter the information you recorded earlier.
+
+>💡 Tip
+> The default scenario is quick deployment, which uses .env.example.
+> If you need to configure voice models (STT/TTS), please add the relevant variables to .env.example in advance. We will provide a front-end configuration interface as soon as possible.
+
+
+## 🌐 Step 3: Verify the deployment
+
+After deployment:
+
+1. Open `http://localhost:3000` in your browser.
+2. Review the [User Guide](https://doc.nexent.tech/en/user-guide/home-page) to validate agent functionality.
+
+
+## Optional Operations
+
+### 🧹 Clean Up Old Version Images
+
+If images were not updated correctly, you can clean up old containers and images before upgrading:
 
 ```bash
 # Stop and remove existing containers
@@ -38,24 +82,9 @@ docker system prune -af
 
 ---
 
-## 🔄 Step 2: Update code and redeploy
-
-```bash
-git pull
-cd nexent/docker
-cp .env.example .env
-bash deploy.sh
-```
-
-> 💡 Tip
-> - `.env.example` works for default deployments.
-> - Configure speech models (STT/TTS) in `.env` when needed. A frontend configuration flow is coming soon.
-
----
-
-## 🗄️ Step 3: Apply database migrations
+## 🗄️ Manual Database Update
 
-Run the SQL scripts shipped with each release to keep your schema up to date.
+If some SQL files fail to execute during the upgrade, you can perform the update manually.
 
 ### ✅ Method A: Use a SQL editor (recommended)
 
@@ -68,8 +97,8 @@ Run the SQL scripts shipped with each release to keep your schema up to date.
    - Password
 3. Test the connection. When successful, you should see tables under the `nexent` schema.
 4. Open a new query window.
-5. Navigate to `/nexent/docker/sql`. Each file contains one migration script with its release date in the filename.
-6. Execute every script dated after your previous deployment, in chronological order.
+5. Navigate to the /nexent/docker/sql directory and open the failed SQL file(s) to view the script.
+6. Execute the failed SQL file(s) and any subsequent version SQL files in order.
 
 > ⚠️ Important
 > - Always back up the database first, especially in production.
@@ -97,14 +126,12 @@ Run the SQL scripts shipped with each release to keep your schema up to date.
 3. Execute SQL files sequentially (host machine example):
 
    ```bash
-   # Example: If today is November 6th and your last update was on October 20th, 
-   # and there are two new files 1030-update.sql and 1105-update.sql, 
    # execute the following commands (please replace the placeholders with your actual values)
-   docker exec -i nexent-postgresql psql -U [YOUR_POSTGRES_USER] -d [YOUR_POSTGRES_DB] < ./sql/1030-update.sql
-   docker exec -i nexent-postgresql psql -U [YOUR_POSTGRES_USER] -d [YOUR_POSTGRES_DB] < ./sql/1105-update.sql
+   docker exec -i nexent-postgresql psql -U [YOUR_POSTGRES_USER] -d [YOUR_POSTGRES_DB] < ./sql/v1.1.1_1030-update.sql
+   docker exec -i nexent-postgresql psql -U [YOUR_POSTGRES_USER] -d [YOUR_POSTGRES_DB] < ./sql/v1.1.2_1105-update.sql
    ```
 
-   Execute the scripts in chronological order based on your deployment date.
+   Execute the corresponding scripts for your deployment versions in version order.
 
 > 💡 Tips
 > - Load environment variables first if they are defined in `.env`:
@@ -126,14 +153,3 @@ Run the SQL scripts shipped with each release to keep your schema up to date.
 >   ```bash
 >   docker exec -i nexent-postgres pg_dump -U [YOUR_POSTGRES_USER] [YOUR_POSTGRES_DB] > backup_$(date +%F).sql
 >   ```
-
----
-
-## 🌐 Step 4: Verify the deployment
-
-After deployment:
-
-1. Open `http://localhost:3000` in your browser.
-2. Review the [User Guide](https://doc.nexent.tech/en/user-guide/home-page) to validate agent functionality.
-
-