ModelEngine-Group
diff --git a/‎backend/agents/create_agent_info.py‎
Lines changed: 29 additions & 15 deletions b/‎backend/agents/create_agent_info.py‎
Lines changed: 29 additions & 15 deletions
diff --git a/‎backend/apps/knowledge_summary_app.py‎
Lines changed: 4 additions & 1 deletion b/‎backend/apps/knowledge_summary_app.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎backend/apps/model_managment_app.py‎
Lines changed: 17 additions & 0 deletions b/‎backend/apps/model_managment_app.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎backend/apps/prompt_app.py‎
Lines changed: 1 addition & 0 deletions b/‎backend/apps/prompt_app.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/consts/const.py‎
Lines changed: 0 additions & 1 deletion b/‎backend/consts/const.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎backend/consts/model.py‎
Lines changed: 1 addition & 1 deletion b/‎backend/consts/model.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/database/agent_db.py‎
Lines changed: 0 additions & 1 deletion b/‎backend/database/agent_db.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎backend/services/agent_service.py‎
Lines changed: 1 addition & 1 deletion b/‎backend/services/agent_service.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/services/config_sync_service.py‎
Lines changed: 2 additions & 1 deletion b/‎backend/services/config_sync_service.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backend/services/elasticsearch_service.py‎
Lines changed: 66 additions & 26 deletions b/‎backend/services/elasticsearch_service.py‎
Lines changed: 66 additions & 26 deletions
@@ -15,6 +15,8 @@
 from services.memory_config_service import build_memory_context
 from database.agent_db import search_agent_info_by_agent_id, query_sub_agents_id_list
 from database.tool_db import search_tools_for_sub_agent
+from database.model_management_db import get_model_records
+from utils.model_name_utils import add_repo_to_name
 from utils.prompt_template_utils import get_agent_prompt_template
 from utils.config_utils import tenant_config_manager, get_model_name_from_config
 from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE
@@ -24,21 +26,34 @@
 
 
 async def create_model_config_list(tenant_id):
+    records = get_model_records({"model_type": "llm"}, tenant_id)
+    model_list = []
+    for record in records:
+        model_list.append(
+            ModelConfig(cite_name=record["display_name"],
+                        api_key=record.get("api_key", ""),
+                        model_name=add_repo_to_name(
+                                model_repo=record["model_repo"],
+                                model_name=record["model_name"],
+                            ),
+                        url=record["base_url"]))
+    # fit for old version, main_model and sub_model use default model
     main_model_config = tenant_config_manager.get_model_config(
         key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
-    sub_model_config = tenant_config_manager.get_model_config(
-        key=MODEL_CONFIG_MAPPING["llmSecondary"], tenant_id=tenant_id)
-
-    return [ModelConfig(cite_name="main_model",
-                        api_key=main_model_config.get("api_key", ""),
-                        model_name=get_model_name_from_config(main_model_config) if main_model_config.get(
-                            "model_name") else "",
-                        url=main_model_config.get("base_url", "")),
-            ModelConfig(cite_name="sub_model",
-                        api_key=sub_model_config.get("api_key", ""),
-                        model_name=get_model_name_from_config(sub_model_config) if sub_model_config.get(
-                            "model_name") else "",
-                        url=sub_model_config.get("base_url", ""))]
+    model_list.append(
+        ModelConfig(cite_name="main_model",
+                    api_key=main_model_config.get("api_key", ""),
+                    model_name=get_model_name_from_config(main_model_config) if main_model_config.get(
+                        "model_name") else "",
+                    url=main_model_config.get("base_url", "")))
+    model_list.append(
+        ModelConfig(cite_name="sub_model",
+                    api_key=main_model_config.get("api_key", ""),
+                    model_name=get_model_name_from_config(main_model_config) if main_model_config.get(
+                        "model_name") else "",
+                    url=main_model_config.get("base_url", "")))
+
+    return model_list
 
 
 async def create_agent_config(
@@ -336,8 +351,7 @@ async def create_agent_run_info(
         "remote_mcp_server": default_mcp_url,
         "status": True
     })
-    remote_mcp_dict = {record["remote_mcp_server_name"]
-        : record for record in remote_mcp_list if record["status"]}
+    remote_mcp_dict = {record["remote_mcp_server_name"]: record for record in remote_mcp_list if record["status"]}
 
     # Filter MCP servers and tools
     mcp_host = filter_mcp_servers_and_tools(agent_config, remote_mcp_dict)
 
@@ -20,6 +20,8 @@ async def auto_summary(
                                description="Name of the index to get documents from"),
         batch_size: int = Query(
             1000, description="Number of documents to retrieve per batch"),
+        model_id: Optional[int] = Query(
+            None, description="Model ID to use for summary generation"),
         es_core: ElasticSearchCore = Depends(get_es_core),
         authorization: Optional[str] = Header(None)
 ):
@@ -34,7 +36,8 @@ async def auto_summary(
             batch_size=batch_size,
             es_core=es_core,
             tenant_id=tenant_id,
-            language=language
+            language=language,
+            model_id=model_id
         )
     except Exception as e:
         logger.error("Knowledge base summary generation failed", exc_info=True)
 
@@ -38,6 +38,7 @@
     batch_update_models_for_tenant,
     delete_model_for_tenant,
     list_models_for_tenant,
+    list_llm_models_for_tenant,
 )
 from utils.auth_utils import get_current_user_id
 
@@ -258,6 +259,22 @@ async def get_model_list(authorization: Optional[str] = Header(None)):
                             detail="Failed to retrieve model list")
 
 
+@router.get("/llm_list")
+async def get_llm_model_list(authorization: Optional[str] = Header(None)):
+    """Get list of LLM models for the current tenant."""
+    try:
+        _, tenant_id = get_current_user_id(authorization)
+        llm_list = await list_llm_models_for_tenant(tenant_id)
+        return JSONResponse(status_code=HTTPStatus.OK, content={
+            "message": "Successfully retrieved LLM list",
+            "data": jsonable_encoder(llm_list)
+        })
+    except Exception as e:
+        logging.error(f"Failed to retrieve LLM list: {str(e)}")
+        raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+                            detail="Failed to retrieve LLM list")
+
+
 @router.post("/healthcheck")
 async def check_model_health(
         display_name: str = Query(..., description="Display name to check"),
 
@@ -23,6 +23,7 @@ async def generate_and_save_system_prompt_api(
             authorization, http_request)
         return StreamingResponse(gen_system_prompt_streamable(
             agent_id=prompt_request.agent_id,
+            model_id=prompt_request.model_id,
             task_description=prompt_request.task_description,
             user_id=user_id,
             tenant_id=tenant_id,
 
@@ -214,7 +214,6 @@
 
 MODEL_CONFIG_MAPPING = {
     "llm": "LLM_ID",
-    "llmSecondary": "LLM_SECONDARY_ID",
     "embedding": "EMBEDDING_ID",
     "multiEmbedding": "MULTI_EMBEDDING_ID",
     "rerank": "RERANK_ID",
 
@@ -87,7 +87,6 @@ class SingleModelConfig(BaseModel):
 
 class ModelConfig(BaseModel):
     llm: SingleModelConfig
-    llmSecondary: SingleModelConfig
     embedding: SingleModelConfig
     multiEmbedding: SingleModelConfig
     rerank: SingleModelConfig
@@ -189,6 +188,7 @@ class OpinionRequest(BaseModel):
 class GeneratePromptRequest(BaseModel):
     task_description: str
     agent_id: int
+    model_id: int
 
 
 class GenerateTitleRequest(BaseModel):
 
@@ -79,7 +79,6 @@ def create_agent(agent_info, tenant_id: str, user_id: str):
         "tenant_id": tenant_id,
         "created_by": user_id,
         "updated_by": user_id,
-        "model_name": "main_model",
         "max_steps": 5
     })
     with get_db_session() as session:
 
@@ -1049,4 +1049,4 @@ def get_sub_agents_recursive(parent_agent_id: int, depth: int = 0, max_depth: in
     except Exception as e:
         logger.exception(
             f"Failed to get agent call relationship for agent {agent_id}: {str(e)}")
-        raise ValueError(f"Failed to get agent call relationship: {str(e)}")
+        raise ValueError(f"Failed to get agent call relationship: {str(e)}")
@@ -130,7 +130,8 @@ async def load_config_impl(language: str, tenant_id: str):
 
 def build_app_config(language: str, tenant_id: str) -> dict:
     default_app_name = DEFAULT_APP_NAME_ZH if language == LANGUAGE["ZH"] else DEFAULT_APP_NAME_EN
-    default_app_description = DEFAULT_APP_DESCRIPTION_ZH if language == LANGUAGE["ZH"] else DEFAULT_APP_DESCRIPTION_EN
+    default_app_description = DEFAULT_APP_DESCRIPTION_ZH if language == LANGUAGE[
+        "ZH"] else DEFAULT_APP_DESCRIPTION_EN
 
     return {
         "name": tenant_config_manager.get_app_config(APP_NAME, tenant_id=tenant_id) or default_app_name,
 
@@ -16,7 +16,6 @@
 from datetime import datetime, timezone
 from typing import Any, Dict, Generator, List, Optional
 
-from dotenv import load_dotenv
 from fastapi import Body, Depends, Path, Query
 from fastapi.responses import StreamingResponse
 from jinja2 import Template, StrictUndefined
@@ -43,7 +42,9 @@
 logger = logging.getLogger("elasticsearch_service")
 
 
-def generate_knowledge_summary_stream(keywords: str, language: str, tenant_id: str) -> Generator:
+
+
+def generate_knowledge_summary_stream(keywords: str, language: str, tenant_id: str, model_id: Optional[int] = None) -> Generator:
     """
     Generate a knowledge base summary based on keywords
 
@@ -55,9 +56,6 @@ def generate_knowledge_summary_stream(keywords: str, language: str, tenant_id: s
     Returns:
         str:  Generate a knowledge base summary
     """
-    # Load environment variables
-    load_dotenv()
-
     # Load prompt words based on language
     prompts = get_knowledge_summary_prompt_template(language)
 
@@ -73,20 +71,47 @@ def generate_knowledge_summary_stream(keywords: str, language: str, tenant_id: s
         {"role": MESSAGE_ROLE["USER"], "content": user_prompt}
     ]
 
-    # Get model configuration from tenant config manager
-    model_config = tenant_config_manager.get_model_config(
-        key=MODEL_CONFIG_MAPPING["llmSecondary"], tenant_id=tenant_id)
+    # Get model configuration
+    if model_id:
+        try:
+            from database.model_management_db import get_model_by_model_id
+            model_info = get_model_by_model_id(model_id, tenant_id)
+            if model_info:
+                model_config = {
+                    'api_key': model_info.get('api_key', ''),
+                    'base_url': model_info.get('base_url', ''),
+                    'model_name': model_info.get('model_name', ''),
+                    'model_repo': model_info.get('model_repo', '')
+                }
+            else:
+                # Fallback to default model if specified model not found
+                logger.warning(f"Specified model {model_id} not found, falling back to default LLM.")
+                model_config = tenant_config_manager.get_model_config(
+                    key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
+        except Exception as e:
+            logger.warning(f"Failed to get model {model_id}, using default model: {e}")
+            model_config = tenant_config_manager.get_model_config(
+                key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
+    else:
+        # Use default model configuration
+        model_config = tenant_config_manager.get_model_config(
+            key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
 
     # initialize OpenAI client
     client = OpenAI(api_key=model_config.get('api_key', ""),
                     base_url=model_config.get('base_url', ""))
 
     try:
         # Create stream chat completion request
-        max_tokens = KNOWLEDGE_SUMMARY_MAX_TOKENS_ZH if language == LANGUAGE["ZH"] else KNOWLEDGE_SUMMARY_MAX_TOKENS_EN
+        max_tokens = KNOWLEDGE_SUMMARY_MAX_TOKENS_ZH if language == LANGUAGE[
+            "ZH"] else KNOWLEDGE_SUMMARY_MAX_TOKENS_EN
+        # Get model name for the request
+        model_name_for_request = model_config.get("model_name", "")
+        if model_config.get("model_repo"):
+            model_name_for_request = f"{model_config['model_repo']}/{model_name_for_request}"
+
         stream = client.chat.completions.create(
-            model=get_model_name_from_config(model_config) if model_config.get(
-                "model_name") else "",  # use model name from config
+            model=model_name_for_request,
             messages=messages,
             max_tokens=max_tokens,  # add max_tokens limit
             stream=True  # enable stream output
@@ -385,7 +410,8 @@ async def delete_index(
             }
             success = delete_knowledge_record(update_data)
             if not success:
-                raise Exception(f"Error deleting knowledge record for index {index_name}")
+                raise Exception(
+                    f"Error deleting knowledge record for index {index_name}")
 
             return {"status": "success", "message": f"Index {index_name} and associated files deleted successfully"}
         except Exception as e:
@@ -397,8 +423,10 @@ def list_indices(
                 "*", description="Pattern to match index names"),
             include_stats: bool = Query(
                 False, description="Whether to include index stats"),
-            tenant_id: str = Body(description="ID of the tenant listing the knowledge base"),
-            user_id: str = Body(description="ID of the user listing the knowledge base"),
+            tenant_id: str = Body(
+                description="ID of the tenant listing the knowledge base"),
+            user_id: str = Body(
+                description="ID of the user listing the knowledge base"),
             es_core: ElasticSearchCore = Depends(get_es_core)
     ):
         """
@@ -424,7 +452,8 @@ def list_indices(
         for record in db_record:
             # async PG database to sync ES, remove the data that is not in ES
             if record["index_name"] not in all_indices_list:
-                delete_knowledge_record({"index_name": record["index_name"], "user_id": user_id})
+                delete_knowledge_record(
+                    {"index_name": record["index_name"], "user_id": user_id})
                 continue
             if record["embedding_model_name"] is None:
                 model_name_is_none_list.append(record["index_name"])
@@ -449,8 +478,9 @@ def list_indices(
                         "stats": index_stats
                     })
                     if index_name in model_name_is_none_list:
-                        update_model_name_by_index_name(index_name, 
-                                                        index_stats.get("base_info", {}).get("embedding_model", ""), 
+                        update_model_name_by_index_name(index_name,
+                                                        index_stats.get("base_info", {}).get(
+                                                            "embedding_model", ""),
                                                         tenant_id, user_id)
             response["indices_info"] = stats_info
 
@@ -514,11 +544,14 @@ def get_index_name(
             error_msg = str(e)
             # Check if it's an ElasticSearch connection issue
             if "503" in error_msg or "search_phase_execution_exception" in error_msg:
-                raise Exception(f"ElasticSearch service unavailable for index {index_name}: {error_msg}")
+                raise Exception(
+                    f"ElasticSearch service unavailable for index {index_name}: {error_msg}")
             elif "ApiError" in error_msg:
-                raise Exception(f"ElasticSearch API error for index {index_name}: {error_msg}")
+                raise Exception(
+                    f"ElasticSearch API error for index {index_name}: {error_msg}")
             else:
-                raise Exception(f"Error getting info for index {index_name}: {error_msg}")
+                raise Exception(
+                    f"Error getting info for index {index_name}: {error_msg}")
 
     @staticmethod
     def index_documents(
@@ -551,7 +584,8 @@ def index_documents(
                         index_name, es_core=es_core)
                     logger.info(f"Created new index {index_name}")
                 except Exception as create_error:
-                    raise Exception(f"Failed to create index {index_name}: {str(create_error)}")
+                    raise Exception(
+                        f"Failed to create index {index_name}: {str(create_error)}")
 
             # Transform indexing request results to documents
             documents = []
@@ -783,7 +817,8 @@ async def list_files(
             return {"files": files}
 
         except Exception as e:
-            raise Exception(f"Error getting file list for index {index_name}: {str(e)}")
+            raise Exception(
+                f"Error getting file list for index {index_name}: {str(e)}")
 
     @staticmethod
     def delete_documents(
@@ -828,9 +863,12 @@ async def summary_index_name(self,
                                      1000, description="Number of documents to retrieve per batch"),
                                  es_core: ElasticSearchCore = Depends(
                                      get_es_core),
+                                 user_id: Optional[str] = Body(
+                                     None, description="ID of the user delete the knowledge base"),
                                  tenant_id: Optional[str] = Body(
                                      None, description="ID of the tenant"),
-                                 language: str = LANGUAGE["ZH"]
+                                 language: str = LANGUAGE["ZH"],
+                                 model_id: Optional[int] = None
                                  ):
         """
         Generate a summary for the specified index based on its content
@@ -848,7 +886,8 @@ async def summary_index_name(self,
         try:
             # Get all documents
             if not tenant_id:
-                raise Exception("Tenant ID is required for summary generation.")
+                raise Exception(
+                    "Tenant ID is required for summary generation.")
             all_documents = ElasticSearchService.get_random_documents(
                 index_name, batch_size, es_core)
             all_chunks = self._clean_chunks_for_summary(all_documents)
@@ -860,7 +899,7 @@ async def summary_index_name(self,
             async def generate_summary():
                 token_join = []
                 try:
-                    for new_token in generate_knowledge_summary_stream(keywords_for_summary, language, tenant_id):
+                    for new_token in generate_knowledge_summary_stream(keywords_for_summary, language, tenant_id, model_id):
                         if new_token == "END":
                             break
                         else:
@@ -947,7 +986,8 @@ def get_random_documents(
             }
 
         except Exception as e:
-            raise Exception(f"Error retrieving random documents from index {index_name}: {str(e)}")
+            raise Exception(
+                f"Error retrieving random documents from index {index_name}: {str(e)}")
 
     @staticmethod
     def change_summary(