fix ut

e06084 · e06084 · commit e479452c7c92 · 2025-11-06T14:10:18.000+08:00
diff --git a/README.md b/README.md
@@ -435,6 +435,7 @@ The current built-in detection rules and model methods focus on common data qual
 - [RedPajama-Data](https://github.com/togethercomputer/RedPajama-Data)
 - [mlflow](https://github.com/mlflow/mlflow)
 - [deepeval](https://github.com/confident-ai/deepeval)
+- [ragas](https://github.com/explodinggradients/ragas)
 
 # Contribution
 
diff --git a/README_ja.md b/README_ja.md
@@ -428,6 +428,7 @@ result = executor.execute()
 - [RedPajama-Data](https://github.com/togethercomputer/RedPajama-Data)
 - [mlflow](https://github.com/mlflow/mlflow)
 - [deepeval](https://github.com/confident-ai/deepeval)
+- [ragas](https://github.com/explodinggradients/ragas)
 
 # 貢献
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -431,6 +431,7 @@ result = executor.execute()
 - [RedPajama-Data](https://github.com/togethercomputer/RedPajama-Data)
 - [mlflow](https://github.com/mlflow/mlflow)
 - [deepeval](https://github.com/confident-ai/deepeval)
+- [ragas](https://github.com/explodinggradients/ragas)
 
 # 贡献
 
diff --git a/dingo/model/llm/llm_rag_context_precision.py b/dingo/model/llm/llm_rag_context_precision.py
@@ -37,6 +37,9 @@ def build_messages(cls, input_data: Data) -> List:
         question = input_data.prompt or input_data.raw_data.get("question", "")
         answer = input_data.content or input_data.raw_data.get("answer", "")
 
+        if not answer:
+            raise ValueError("Context Precision评估需要answer字段")
+
         # 处理contexts
         contexts = None
         if input_data.context:
diff --git a/test/scripts/model/llm/test_rag_metrics.py b/test/scripts/model/llm/test_rag_metrics.py
@@ -12,7 +12,7 @@
 pytest test/scripts/model/llm/test_rag_metrics.py -v
 """
 
-from unittest.mock import Mock, patch
+from unittest.mock import patch
 
 import pytest
 
@@ -376,11 +376,14 @@ def test_process_response_low_relevancy(self):
 class TestIntegration:
     """集成测试（使用 mock）"""
 
-    @patch('dingo.model.llm.llm_rag_faithfulness.LLMRAGFaithfulness.call_llm')
-    def test_faithfulness_end_to_end(self, mock_call_llm):
+    @patch('dingo.model.llm.base_openai.BaseOpenAI.send_messages')
+    @patch('dingo.model.llm.base_openai.BaseOpenAI.create_client')
+    def test_faithfulness_end_to_end(self, mock_create_client, mock_send_messages):
         """测试忠实度端到端评估"""
+        # Mock 客户端创建
+        mock_create_client.return_value = None
         # Mock LLM 响应
-        mock_call_llm.return_value = '{"score": 8, "reason": "答案基本忠实于上下文。"}'
+        mock_send_messages.return_value = '{"score": 8, "reason": "答案基本忠实于上下文。"}'
 
         data = Data(
             data_id="test_integration",
@@ -393,12 +396,16 @@ def test_faithfulness_end_to_end(self, mock_call_llm):
 
         assert result.score == 8
         assert result.error_status is False
-        assert mock_call_llm.called
+        assert mock_send_messages.called
 
-    @patch('dingo.model.llm.llm_rag_answer_relevancy.LLMRAGAnswerRelevancy.call_llm')
-    def test_answer_relevancy_end_to_end(self, mock_call_llm):
+    @patch('dingo.model.llm.base_openai.BaseOpenAI.send_messages')
+    @patch('dingo.model.llm.base_openai.BaseOpenAI.create_client')
+    def test_answer_relevancy_end_to_end(self, mock_create_client, mock_send_messages):
         """测试答案相关性端到端评估"""
-        mock_call_llm.return_value = '{"score": 9, "reason": "答案直接回答问题。"}'
+        # Mock 客户端创建
+        mock_create_client.return_value = None
+        # Mock LLM 响应
+        mock_send_messages.return_value = '{"score": 9, "reason": "答案直接回答问题。"}'
 
         data = Data(
             data_id="test_integration_2",
@@ -410,12 +417,16 @@ def test_answer_relevancy_end_to_end(self, mock_call_llm):
 
         assert result.score == 9
         assert result.error_status is False
-        assert mock_call_llm.called
+        assert mock_send_messages.called
 
-    @patch('dingo.model.llm.llm_rag_context_relevancy.LLMRAGContextRelevancy.call_llm')
-    def test_context_relevancy_end_to_end(self, mock_call_llm):
+    @patch('dingo.model.llm.base_openai.BaseOpenAI.send_messages')
+    @patch('dingo.model.llm.base_openai.BaseOpenAI.create_client')
+    def test_context_relevancy_end_to_end(self, mock_create_client, mock_send_messages):
         """测试上下文相关性端到端评估"""
-        mock_call_llm.return_value = '{"score": 6, "reason": "半数上下文相关。"}'
+        # Mock 客户端创建
+        mock_create_client.return_value = None
+        # Mock LLM 响应
+        mock_send_messages.return_value = '{"score": 6, "reason": "半数上下文相关。"}'
 
         data = Data(
             data_id="test_integration_3",
@@ -430,7 +441,7 @@ def test_context_relevancy_end_to_end(self, mock_call_llm):
 
         assert result.score == 6
         assert result.error_status is False  # 默认阈值是5
-        assert mock_call_llm.called
+        assert mock_send_messages.called
 
 
 class TestEdgeCases:
@@ -521,5 +532,5 @@ def test_missing_score_in_response(self):
             LLMRAGFaithfulness.process_response(response)
 
 
-if __name__ == "__main__":
-    pytest.main([__file__, "-v", "-s"])
+# 使用 pytest 命令运行测试，而不是直接运行此文件
+# pytest test/scripts/model/llm/test_rag_metrics.py -v
diff --git a/web-static/assets/main-Dha4eK9H.js b/web-static/assets/main-Dha4eK9H.js
@@ -50070,7 +50070,7 @@ const genVirtualStyle = (token2) => {
       [`${componentCls}-tbody-virtual`]: {
         [`${componentCls}-tbody-virtual-holder-inner`]: {
           [`
-            & > ${componentCls}-row, 
+            & > ${componentCls}-row,
             & > div:not(${componentCls}-row) > ${componentCls}-row
           `]: {
             display: "flex",
diff --git a/web-static/assets/main-O6AZuAtl.css b/web-static/assets/main-O6AZuAtl.css
@@ -1605,4 +1605,4 @@ body #root {
 }.index-module__main-home___zg1x- {
   width: calc(100% - var(--sidebar-width));
   height: 100%;
-}
+}