ModelEngine-Group
diff --git a/‎backend/agents/create_agent_info.py‎
Lines changed: 2 additions & 1 deletion b/‎backend/agents/create_agent_info.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backend/apps/vectordatabase_app.py‎
Lines changed: 15 additions & 8 deletions b/‎backend/apps/vectordatabase_app.py‎
Lines changed: 15 additions & 8 deletions
diff --git a/‎backend/data_process/tasks.py‎
Lines changed: 76 additions & 56 deletions b/‎backend/data_process/tasks.py‎
Lines changed: 76 additions & 56 deletions
diff --git a/‎backend/database/knowledge_db.py‎
Lines changed: 1 addition & 0 deletions b/‎backend/database/knowledge_db.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/services/model_management_service.py‎
Lines changed: 0 additions & 27 deletions b/‎backend/services/model_management_service.py‎
Lines changed: 0 additions & 27 deletions
diff --git a/‎backend/services/model_provider_service.py‎
Lines changed: 9 additions & 2 deletions b/‎backend/services/model_provider_service.py‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎backend/services/redis_service.py‎
Lines changed: 8 additions & 8 deletions b/‎backend/services/redis_service.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎backend/services/tenant_config_service.py‎
Lines changed: 16 additions & 0 deletions b/‎backend/services/tenant_config_service.py‎
Lines changed: 16 additions & 0 deletions
@@ -15,7 +15,7 @@
     get_vector_db_core,
     get_embedding_model,
 )
-from services.tenant_config_service import get_selected_knowledge_list
+from services.tenant_config_service import get_selected_knowledge_list, build_knowledge_name_mapping
 from services.remote_mcp_service import get_remote_mcp_server_list
 from services.memory_config_service import build_memory_context
 from services.image_service import get_vlm_model
@@ -241,6 +241,7 @@ async def create_tool_config_list(agent_id, tenant_id, user_id):
                 "index_names": index_names,
                 "vdb_core": get_vector_db_core(),
                 "embedding_model": get_embedding_model(tenant_id=tenant_id),
+                "name_resolver": build_knowledge_name_mapping(tenant_id=tenant_id, user_id=user_id),
             }
         elif tool_config.class_name == "AnalyzeTextFileTool":
             tool_config.metadata = {
 
@@ -5,6 +5,7 @@
 
 from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
 from fastapi.responses import JSONResponse
+import re
 
 from consts.model import ChunkCreateRequest, ChunkUpdateRequest, HybridSearchRequest, IndexingResponse
 from nexent.vector_database.base import VectorDatabaseCore
@@ -124,8 +125,11 @@ def create_index_documents(
     except Exception as e:
         error_msg = str(e)
         logger.error(f"Error indexing documents: {error_msg}")
+
         raise HTTPException(
-            status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error indexing documents: {error_msg}")
+            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+            detail=f"Error indexing documents: {error_msg}"
+        )
 
 
 @router.get("/{index_name}/files")
@@ -229,15 +233,18 @@ async def get_document_error_info(
         error_code = None
 
         if raw_error:
-            text = raw_error
-
             # Try to parse JSON (new format with error_code only)
-            if isinstance(text, str) and text.strip().startswith("{"):
+            try:
+                parsed = json.loads(raw_error)
+                if isinstance(parsed, dict) and "error_code" in parsed:
+                    error_code = parsed.get("error_code")
+            except Exception:
+                # Fallback: regex extraction if JSON parsing fails
                 try:
-                    parsed = json.loads(text)
-                    if isinstance(parsed, dict):
-                        if "error_code" in parsed:
-                            error_code = parsed.get("error_code")
+                    match = re.search(
+                        r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', raw_error)
+                    if match:
+                        error_code = match.group(1)
                 except Exception:
                     pass
 
 
@@ -10,6 +10,7 @@
 from typing import Any, Dict, Optional
 
 import aiohttp
+import re
 import ray
 from celery import Task, chain, states
 from celery.exceptions import Retry
@@ -41,11 +42,33 @@ def extract_error_code(reason: str, parsed_error: Optional[Dict] = None) -> Opti
     Extract error code from error message or parsed error dict.
     Returns error code if matched, None otherwise.
     """
-    # First check if error_code is already in parsed_error
+    # 1) parsed_error dict
     if parsed_error and isinstance(parsed_error, dict):
-        error_code = parsed_error.get("error_code")
-        if error_code:
-            return error_code
+        code = parsed_error.get("error_code")
+        if code:
+            return code
+
+    # 2) try parse reason as JSON
+    try:
+        parsed = json.loads(reason)
+        if isinstance(parsed, dict):
+            code = parsed.get("error_code")
+            if code:
+                return code
+            detail = parsed.get("detail")
+            if isinstance(detail, dict) and detail.get("error_code"):
+                return detail.get("error_code")
+    except Exception:
+        pass
+
+    # 3) regex from raw string (supports single/double quotes)
+    try:
+        match = re.search(
+            r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', reason)
+        if match:
+            return match.group(1)
+    except Exception:
+        pass
 
     return "unknown_error"
 
@@ -688,68 +711,61 @@ async def index_documents():
 
             try:
                 connector = aiohttp.TCPConnector(verify_ssl=False)
-                # Increased timeout for large documents and slow ES bulk operations
-                # Use generous total timeout to avoid marking long-running but successful
-                # indexing as failed.
                 timeout = aiohttp.ClientTimeout(total=600)
 
                 async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
                     async with session.post(
                         full_url,
                         headers=headers,
                         json=formatted_chunks,
-                        raise_for_status=True
+                        raise_for_status=False
                     ) as response:
-                        result = await response.json()
+                        text = await response.text()
+                        status = response.status
+                        # Try parse JSON body for structured error_code/message
+                        parsed_body = None
+                        try:
+                            parsed_body = json.loads(text)
+                        except Exception:
+                            parsed_body = None
+
+                        if status >= 400:
+                            error_code = None
+                            if isinstance(parsed_body, dict):
+                                error_code = parsed_body.get("error_code")
+                                detail = parsed_body.get("detail")
+                                if isinstance(detail, dict) and detail.get("error_code"):
+                                    error_code = detail.get("error_code")
+                                elif isinstance(detail, str):
+                                    try:
+                                        parsed_detail = json.loads(detail)
+                                        if isinstance(parsed_detail, dict):
+                                            error_code = parsed_detail.get(
+                                                "error_code", error_code)
+                                    except Exception:
+                                        pass
+
+                            if not error_code:
+                                try:
+                                    match = re.search(
+                                        r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', text)
+                                    if match:
+                                        error_code = match.group(1)
+                                except Exception:
+                                    pass
+
+                            if error_code:
+                                # Raise flat payload to avoid nested JSON and preserve error_code
+                                raise Exception(json.dumps({
+                                    "error_code": error_code
+                                }, ensure_ascii=False))
+
+                            raise Exception(
+                                f"ElasticSearch service returned HTTP {status}")
+
+                        result = parsed_body if isinstance(parsed_body, dict) else await response.json()
                         return result
 
-            except aiohttp.ClientResponseError as e:
-                # 400: embedding model reports chunk count exceeds concurrency
-                if e.status == 400:
-                    raise Exception(json.dumps({
-                        "message": f"ElasticSearch service returned 400 Bad Request: {str(e)}",
-                        "index_name": original_index_name,
-                        "task_name": "forward",
-                        "source": original_source,
-                        "original_filename": original_filename,
-                        "error_code": "embedding_chunks_exceed_limit"
-                    }, ensure_ascii=False))
-
-                # Timeout from Elasticsearch refresh / bulk operations: stop retrying and treat as es_bulk_failed
-                timeout_markers = [
-                    "Connection timeout caused by",
-                    "Read timed out",
-                    "ReadTimeoutError"
-                ]
-                if any(marker in str(e) for marker in timeout_markers):
-                    raise Exception(json.dumps({
-                        "message": f"ElasticSearch operation timed out: {str(e)}",
-                        "index_name": original_index_name,
-                        "task_name": "forward",
-                        "source": original_source,
-                        "original_filename": original_filename,
-                        "error_code": "es_bulk_failed"
-                    }, ensure_ascii=False))
-
-                # 503: vector service busy: bubble up immediately, let caller decide
-                if e.status == 503:
-                    raise Exception(json.dumps({
-                        "message": f"ElasticSearch service unavailable: {str(e)}",
-                        "index_name": original_index_name,
-                        "task_name": "forward",
-                        "source": original_source,
-                        "original_filename": original_filename,
-                        "error_code": "vector_service_busy"
-                    }, ensure_ascii=False))
-
-                # Other client response errors: bubble up
-                raise Exception(json.dumps({
-                    "message": f"ElasticSearch service unavailable: {str(e)}",
-                    "index_name": original_index_name,
-                    "task_name": "forward",
-                    "source": original_source,
-                    "original_filename": original_filename
-                }, ensure_ascii=False))
             except aiohttp.ClientConnectorError as e:
                 logger.error(
                     f"[{self.request.id}] FORWARD TASK: Connection error to {full_url}: {str(e)}")
@@ -879,6 +895,10 @@ async def index_documents():
         }
     except Exception as e:
         # If it's an Exception, all go here (including our custom JSON message)
+        # Important: if this is a Celery Retry, re-raise immediately without recording error_code
+        if isinstance(e, Retry):
+            raise
+
         task_id = self.request.id
         try:
             error_info = json.loads(str(e))
 
@@ -197,6 +197,7 @@ def get_knowledge_info_by_knowledge_ids(knowledge_ids: List[str]) -> List[Dict[s
                 knowledge_info.append({
                     "knowledge_id": item.knowledge_id,
                     "index_name": item.index_name,
+                    "knowledge_name": item.knowledge_name,
                     "knowledge_sources": item.knowledge_sources,
                     "embedding_model_name": item.embedding_model_name
                 })
 
@@ -241,37 +241,11 @@ async def update_single_model_for_tenant(
             m.get("model_type") == "multi_embedding" for m in existing_models
         )
 
-        async def _try_update_embedding_dimension(model: Dict[str, Any], update_payload: Dict[str, Any]):
-            """Run embedding dimension check when updating embedding models so stored max_tokens stays accurate."""
-            model_type = model.get("model_type")
-            if model_type not in ("embedding", "multi_embedding"):
-                return
-
-            base_url = update_payload.get(
-                "base_url", model.get("base_url", ""))
-            api_key = update_payload.get("api_key", model.get("api_key", ""))
-
-            if not base_url or not api_key:
-                return
-
-            combined_config = {
-                "model_type": model_type,
-                "model_repo": model.get("model_repo", ""),
-                "model_name": add_repo_to_name(model.get("model_repo", ""), model.get("model_name", "")),
-                "base_url": base_url,
-                "api_key": api_key,
-            }
-
-            dimension = await embedding_dimension_check(combined_config)
-            if dimension:
-                update_payload["max_tokens"] = dimension
-
         if has_multi_embedding:
             # Update both embedding and multi_embedding records
             for model in existing_models:
                 # Prepare update data, excluding model_type to preserve original type
                 update_data = {k: v for k, v in model_data.items() if k not in ["model_id", "model_type"]}
-                await _try_update_embedding_dimension(model, update_data)
                 update_model_record(model["model_id"], update_data, user_id)
             logging.debug(
                 f"Model {current_display_name} (embedding + multi_embedding) updated successfully")
@@ -280,7 +254,6 @@ async def _try_update_embedding_dimension(model: Dict[str, Any], update_payload:
             current_model = existing_models[0]
             current_model_id = current_model["model_id"]
             update_data = {k: v for k, v in model_data.items() if k != "model_id"}
-            await _try_update_embedding_dimension(current_model, update_data)
             update_model_record(current_model_id, update_data, user_id)
             logging.debug(f"Model {current_display_name} updated successfully")
     except LookupError:
 
@@ -187,12 +187,19 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a
 
     # Build the canonical representation using the existing Pydantic schema for
     # consistency of validation and default handling.
+    # For embedding/multi_embedding models, max_tokens will be set via connectivity check later,
+    # so use 0 as placeholder if not provided
+    model_type = model["model_type"]
+    is_embedding_type = model_type in ["embedding", "multi_embedding"]
+    max_tokens_value = model.get(
+        "max_tokens", 0) if not is_embedding_type else 0
+
     model_obj = ModelRequest(
         model_factory=provider,
         model_name=model_name,
-        model_type=model["model_type"],
+        model_type=model_type,
         api_key=model_api_key,
-        max_tokens=model["max_tokens"],
+        max_tokens=max_tokens_value,
         display_name=model_display_name,
         expected_chunk_size=expected_chunk_size,
         maximum_chunk_size=maximum_chunk_size,
 
@@ -22,7 +22,12 @@ def client(self) -> redis.Redis:
         if self._client is None:
             if not REDIS_URL:
                 raise ValueError("REDIS_URL environment variable is not set")
-            self._client = redis.from_url(REDIS_URL, socket_timeout=5, socket_connect_timeout=5)
+            self._client = redis.from_url(
+                REDIS_URL, 
+                socket_timeout=5, 
+                socket_connect_timeout=5,
+                decode_responses=True
+            )
         return self._client
 
     @property
@@ -673,8 +678,6 @@ def save_error_info(self, task_id: str, error_reason: str, ttl_days: int = 30) -
                 # Verify the save by reading it back
                 verify = self.client.get(reason_key)
                 if verify:
-                    if isinstance(verify, bytes):
-                        verify = verify.decode('utf-8')
                     logger.debug(f"Verified error info saved for task {task_id}: {verify[:100]}...")
                 else:
                     logger.warning(f"Failed to verify error info save for task {task_id}")
@@ -760,11 +763,8 @@ def get_error_info(self, task_id: str) -> Optional[str]:
         try:
             reason_key = f"error:reason:{task_id}"
             reason = self.client.get(reason_key)
-            if reason:
-                if isinstance(reason, bytes):
-                    return reason.decode('utf-8')
-                return reason
-            return None
+            # With decode_responses=True, reason is already a string
+            return reason if reason else None
         except Exception as e:
             logger.error(
                 f"Failed to get error info for task {task_id}: {str(e)}")
 
@@ -66,3 +66,19 @@ def delete_selected_knowledge_by_index_name(tenant_id: str, user_id: str, index_
                 return False
 
     return True
+
+
+def build_knowledge_name_mapping(tenant_id: str, user_id: str):
+    """
+    Build mapping from user-facing knowledge_name to internal index_name for the selected knowledge bases.
+    Falls back to using index_name as key when knowledge_name is missing for backward compatibility.
+    """
+    knowledge_info_list = get_selected_knowledge_list(
+        tenant_id=tenant_id, user_id=user_id)
+    mapping = {}
+    for info in knowledge_info_list:
+        key = info.get("knowledge_name") or info.get("index_name")
+        value = info.get("index_name")
+        if key and value:
+            mapping[key] = value
+    return mapping