MigoXLab
diff --git a/‎README.md‎
Lines changed: 3 additions & 3 deletions b/‎README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎dingo/config/input_args.py‎
Lines changed: 8 additions & 0 deletions b/‎dingo/config/input_args.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎dingo/model/llm/base_openai.py‎
Lines changed: 26 additions & 0 deletions b/‎dingo/model/llm/base_openai.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎dingo/model/llm/rag/llm_rag_answer_relevancy.py‎
Lines changed: 7 additions & 26 deletions b/‎dingo/model/llm/rag/llm_rag_answer_relevancy.py‎
Lines changed: 7 additions & 26 deletions
@@ -298,9 +298,9 @@ Dingo provides **70+ evaluation metrics** across multiple dimensions, combining
 | **Security** | PII detection, Perspective API toxicity | Privacy and safety |
 
 📊 **[View Complete Metrics Documentation →](docs/metrics.md)**  
-📖 **[RAG Evaluation Guide (中文) →](docs/rag_evaluation_metrics_zh.md)**  
-🔍 **[Hallucination Detection Guide (中文) →](docs/hallucination_guide.md)**  
-✅ **[Factuality Assessment Guide (中文) →](docs/factcheck_guide.md)**
+📖 **[RAG Evaluation Guide →](docs/rag_evaluation_metrics.md)** | **[中文版](docs/rag_evaluation_metrics_zh.md)**  
+🔍 **[Hallucination Detection Guide →](docs/hallucination_detection_guide.md)** | **[中文版](docs/hallucination_guide.md)**  
+✅ **[Factuality Assessment Guide →](docs/factuality_assessment_guide.md)** | **[中文版](docs/factcheck_guide.md)**
 
 Most metrics are backed by academic research to ensure scientific rigor.
 
 
@@ -72,11 +72,19 @@ class EvaluatorRuleArgs(BaseModel):
     parameters: Optional[dict] = None
 
 
+class EmbeddingConfigArgs(BaseModel):
+    """Embedding 模型独立配置"""
+    model: Optional[str] = None
+    key: Optional[str] = None
+    api_url: Optional[str] = None
+
+
 class EvaluatorLLMArgs(BaseModel):
     model: Optional[str] = None
     key: Optional[str] = None
     api_url: Optional[str] = None
     parameters: Optional[dict] = None
+    embedding_config: Optional[EmbeddingConfigArgs] = None
 
 
 class EvalPiplineConfig(BaseModel):
 
@@ -16,23 +16,49 @@
 class BaseOpenAI(BaseLLM):
     dynamic_config = EvaluatorLLMArgs()
 
+    # Embedding 模型配置（用于 RAG 相关评估器）
+    embedding_model = None
+
     # @classmethod
     # def set_prompt(cls, prompt: BasePrompt):
     #     cls.prompt = prompt
 
     @classmethod
     def create_client(cls):
+        """创建 LLM 客户端，如果配置了 embedding_config 则同时初始化 Embedding 客户端"""
         from openai import OpenAI
 
         if not cls.dynamic_config.key:
             raise ValueError("key cannot be empty in llm config.")
         elif not cls.dynamic_config.api_url:
             raise ValueError("api_url cannot be empty in llm config.")
         else:
+            # 创建主 LLM 客户端
             cls.client = OpenAI(
                 api_key=cls.dynamic_config.key, base_url=cls.dynamic_config.api_url
             )
 
+            # 如果配置了 embedding_config，初始化 Embedding 客户端
+            if cls.dynamic_config.embedding_config:
+                embedding_cfg = cls.dynamic_config.embedding_config
+                if not embedding_cfg.api_url:
+                    raise ValueError("embedding_config must provide api_url")
+
+                if not embedding_cfg.model:
+                    raise ValueError("embedding_config must provide model")
+
+                # 创建独立的 Embedding 客户端
+                cls.embedding_client = OpenAI(
+                    api_key=embedding_cfg.key or 'dummy-key',
+                    base_url=embedding_cfg.api_url
+                )
+
+                cls.embedding_model = {
+                    'model_name': embedding_cfg.model,
+                    'client': cls.embedding_client
+                }
+                log.info(f"Initialized independent embedding client: {embedding_cfg.model} @ {embedding_cfg.api_url}")
+
     @classmethod
     def build_messages(cls, input_data: Data) -> List:
         messages = [
 
@@ -77,25 +77,9 @@ class LLMRAGAnswerRelevancy(BaseOpenAI):
     }}
     Output: """
 
-    # 默认的embedding模型
-    embedding_model = None
-
     # 配置参数
     strictness = 3  # 生成的问题数量
 
-    @classmethod
-    def init_embedding_model(cls, model_name: str = "text-embedding-3-large"):
-        """初始化embedding模型"""
-        # 确保LLM客户端已经创建
-        if not hasattr(cls, 'client') or cls.client is None:
-            cls.create_client()
-
-        # 直接使用OpenAI的Embedding API
-        cls.embedding_model = {
-            'model_name': model_name,
-            'client': cls.client
-        }
-
     @classmethod
     def build_messages(cls, input_data: Data) -> List:
         """构建LLM输入消息"""
@@ -162,8 +146,14 @@ def process_question_response(cls, response: str) -> Dict[str, Any]:
     @classmethod
     def calculate_similarity(cls, question: str, generated_questions: List[str]) -> np.ndarray:
         """计算原始问题与生成问题的相似度"""
+        # 检查 Embedding 模型是否已初始化
         if cls.embedding_model is None:
-            cls.init_embedding_model()
+            raise ValueError(
+                "Embedding model not initialized. Please configure 'embedding_config' in your LLM config with:\n"
+                "  - model: embedding model name (e.g., 'BAAI/bge-m3')\n"
+                "  - api_url: embedding service URL\n"
+                "  - key: API key (optional for local services)"
+            )
 
         # 检查生成的问题是否为空列表或全为空字符串
         if not generated_questions or all(q == "" for q in generated_questions):
@@ -229,9 +219,6 @@ def calculate_score(cls, answers: List[Dict[str, Any]], original_question: str)
     @classmethod
     def eval(cls, input_data: Data) -> EvalDetail:
         """评估答案相关性"""
-        # 初始化embedding模型（如果尚未初始化）
-        if cls.embedding_model is None:
-            cls.init_embedding_model()
         raw_data = getattr(input_data, 'raw_data', {})
         # 提取原始问题
         original_question = input_data.prompt or raw_data.get("question", "")
@@ -245,7 +232,6 @@ def eval(cls, input_data: Data) -> EvalDetail:
                     cls.dynamic_config.parameters['temperature'] = 0.7
             else:
                 # 如果没有parameters，创建一个包含temperature的parameters
-                from dingo.config.input_args import EvaluatorLLMArgs
                 current_params = cls.dynamic_config.parameters or {}
                 current_params['temperature'] = 0.7
                 cls.dynamic_config.parameters = current_params
@@ -267,11 +253,6 @@ def eval(cls, input_data: Data) -> EvalDetail:
                 # 检查是否有自定义的strictness参数
                 cls.strictness = cls.dynamic_config.parameters.get('strictness', 3)
 
-                # 检查是否有自定义的embedding模型
-                embedding_model_name = cls.dynamic_config.parameters.get('embedding_model', None)
-                if embedding_model_name:
-                    cls.init_embedding_model(embedding_model_name)
-
             # 构建详细的reason文本
             all_reasons = []
             for detail in details: