Skip to content

Commit 2fa6fde

Browse files
authored
Merge pull request #303 from e06084/dev
feat: update embedding model init
2 parents 1fb877a + cd29c96 commit 2fa6fde

File tree

9 files changed

+1266
-43
lines changed

9 files changed

+1266
-43
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,9 @@ Dingo provides **70+ evaluation metrics** across multiple dimensions, combining
298298
| **Security** | PII detection, Perspective API toxicity | Privacy and safety |
299299

300300
📊 **[View Complete Metrics Documentation →](docs/metrics.md)**
301-
📖 **[RAG Evaluation Guide (中文) →](docs/rag_evaluation_metrics_zh.md)**
302-
🔍 **[Hallucination Detection Guide (中文) →](docs/hallucination_guide.md)**
303-
**[Factuality Assessment Guide (中文) →](docs/factcheck_guide.md)**
301+
📖 **[RAG Evaluation Guide ](docs/rag_evaluation_metrics.md)** | **[中文版](docs/rag_evaluation_metrics_zh.md)**
302+
🔍 **[Hallucination Detection Guide ](docs/hallucination_detection_guide.md)** | **[中文版](docs/hallucination_guide.md)**
303+
**[Factuality Assessment Guide ](docs/factuality_assessment_guide.md)** | **[中文版](docs/factcheck_guide.md)**
304304

305305
Most metrics are backed by academic research to ensure scientific rigor.
306306

dingo/config/input_args.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,19 @@ class EvaluatorRuleArgs(BaseModel):
7272
parameters: Optional[dict] = None
7373

7474

75+
class EmbeddingConfigArgs(BaseModel):
76+
"""Embedding 模型独立配置"""
77+
model: Optional[str] = None
78+
key: Optional[str] = None
79+
api_url: Optional[str] = None
80+
81+
7582
class EvaluatorLLMArgs(BaseModel):
7683
model: Optional[str] = None
7784
key: Optional[str] = None
7885
api_url: Optional[str] = None
7986
parameters: Optional[dict] = None
87+
embedding_config: Optional[EmbeddingConfigArgs] = None
8088

8189

8290
class EvalPiplineConfig(BaseModel):

dingo/model/llm/base_openai.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,49 @@
1616
class BaseOpenAI(BaseLLM):
1717
dynamic_config = EvaluatorLLMArgs()
1818

19+
# Embedding 模型配置(用于 RAG 相关评估器)
20+
embedding_model = None
21+
1922
# @classmethod
2023
# def set_prompt(cls, prompt: BasePrompt):
2124
# cls.prompt = prompt
2225

2326
@classmethod
2427
def create_client(cls):
28+
"""创建 LLM 客户端,如果配置了 embedding_config 则同时初始化 Embedding 客户端"""
2529
from openai import OpenAI
2630

2731
if not cls.dynamic_config.key:
2832
raise ValueError("key cannot be empty in llm config.")
2933
elif not cls.dynamic_config.api_url:
3034
raise ValueError("api_url cannot be empty in llm config.")
3135
else:
36+
# 创建主 LLM 客户端
3237
cls.client = OpenAI(
3338
api_key=cls.dynamic_config.key, base_url=cls.dynamic_config.api_url
3439
)
3540

41+
# 如果配置了 embedding_config,初始化 Embedding 客户端
42+
if cls.dynamic_config.embedding_config:
43+
embedding_cfg = cls.dynamic_config.embedding_config
44+
if not embedding_cfg.api_url:
45+
raise ValueError("embedding_config must provide api_url")
46+
47+
if not embedding_cfg.model:
48+
raise ValueError("embedding_config must provide model")
49+
50+
# 创建独立的 Embedding 客户端
51+
cls.embedding_client = OpenAI(
52+
api_key=embedding_cfg.key or 'dummy-key',
53+
base_url=embedding_cfg.api_url
54+
)
55+
56+
cls.embedding_model = {
57+
'model_name': embedding_cfg.model,
58+
'client': cls.embedding_client
59+
}
60+
log.info(f"Initialized independent embedding client: {embedding_cfg.model} @ {embedding_cfg.api_url}")
61+
3662
@classmethod
3763
def build_messages(cls, input_data: Data) -> List:
3864
messages = [

dingo/model/llm/rag/llm_rag_answer_relevancy.py

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -77,25 +77,9 @@ class LLMRAGAnswerRelevancy(BaseOpenAI):
7777
}}
7878
Output: """
7979

80-
# 默认的embedding模型
81-
embedding_model = None
82-
8380
# 配置参数
8481
strictness = 3 # 生成的问题数量
8582

86-
@classmethod
87-
def init_embedding_model(cls, model_name: str = "text-embedding-3-large"):
88-
"""初始化embedding模型"""
89-
# 确保LLM客户端已经创建
90-
if not hasattr(cls, 'client') or cls.client is None:
91-
cls.create_client()
92-
93-
# 直接使用OpenAI的Embedding API
94-
cls.embedding_model = {
95-
'model_name': model_name,
96-
'client': cls.client
97-
}
98-
9983
@classmethod
10084
def build_messages(cls, input_data: Data) -> List:
10185
"""构建LLM输入消息"""
@@ -162,8 +146,14 @@ def process_question_response(cls, response: str) -> Dict[str, Any]:
162146
@classmethod
163147
def calculate_similarity(cls, question: str, generated_questions: List[str]) -> np.ndarray:
164148
"""计算原始问题与生成问题的相似度"""
149+
# 检查 Embedding 模型是否已初始化
165150
if cls.embedding_model is None:
166-
cls.init_embedding_model()
151+
raise ValueError(
152+
"Embedding model not initialized. Please configure 'embedding_config' in your LLM config with:\n"
153+
" - model: embedding model name (e.g., 'BAAI/bge-m3')\n"
154+
" - api_url: embedding service URL\n"
155+
" - key: API key (optional for local services)"
156+
)
167157

168158
# 检查生成的问题是否为空列表或全为空字符串
169159
if not generated_questions or all(q == "" for q in generated_questions):
@@ -229,9 +219,6 @@ def calculate_score(cls, answers: List[Dict[str, Any]], original_question: str)
229219
@classmethod
230220
def eval(cls, input_data: Data) -> EvalDetail:
231221
"""评估答案相关性"""
232-
# 初始化embedding模型(如果尚未初始化)
233-
if cls.embedding_model is None:
234-
cls.init_embedding_model()
235222
raw_data = getattr(input_data, 'raw_data', {})
236223
# 提取原始问题
237224
original_question = input_data.prompt or raw_data.get("question", "")
@@ -245,7 +232,6 @@ def eval(cls, input_data: Data) -> EvalDetail:
245232
cls.dynamic_config.parameters['temperature'] = 0.7
246233
else:
247234
# 如果没有parameters,创建一个包含temperature的parameters
248-
from dingo.config.input_args import EvaluatorLLMArgs
249235
current_params = cls.dynamic_config.parameters or {}
250236
current_params['temperature'] = 0.7
251237
cls.dynamic_config.parameters = current_params
@@ -267,11 +253,6 @@ def eval(cls, input_data: Data) -> EvalDetail:
267253
# 检查是否有自定义的strictness参数
268254
cls.strictness = cls.dynamic_config.parameters.get('strictness', 3)
269255

270-
# 检查是否有自定义的embedding模型
271-
embedding_model_name = cls.dynamic_config.parameters.get('embedding_model', None)
272-
if embedding_model_name:
273-
cls.init_embedding_model(embedding_model_name)
274-
275256
# 构建详细的reason文本
276257
all_reasons = []
277258
for detail in details:

0 commit comments

Comments
 (0)