Skip to content

Commit 153a19d

Browse files
authored
fix: fix rag example (#283)
1 parent 040fa6f commit 153a19d

File tree

4 files changed

+38
-37
lines changed

4 files changed

+38
-37
lines changed

examples/rag/rag_mock_and_eval.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def evaluate_rag_result(question: str, rag_result: Dict[str, Any]):
199199
model=OPENAI_MODEL,
200200
)
201201
faith_result = LLMRAGFaithfulness.eval(data)
202-
print(f"Faithfulness details: {faith_result.eval_details}")
202+
print(f"Faithfulness details: {faith_result}")
203203

204204
# 2. 评测答案相关性
205205
LLMRAGAnswerRelevancy.dynamic_config = EvaluatorLLMArgs(
@@ -208,7 +208,7 @@ def evaluate_rag_result(question: str, rag_result: Dict[str, Any]):
208208
model=OPENAI_MODEL,
209209
)
210210
ans_rel_result = LLMRAGAnswerRelevancy.eval(data)
211-
print(f"Answer Relevancy details: {ans_rel_result.eval_details}")
211+
print(f"Answer Relevancy details: {ans_rel_result}")
212212

213213
# 3. 评测上下文相关性
214214
LLMRAGContextRelevancy.dynamic_config = EvaluatorLLMArgs(
@@ -217,12 +217,12 @@ def evaluate_rag_result(question: str, rag_result: Dict[str, Any]):
217217
model=OPENAI_MODEL,
218218
)
219219
ctx_rel_result = LLMRAGContextRelevancy.eval(data)
220-
print(f"Context Relevancy details: {ctx_rel_result.eval_details}")
220+
print(f"Context Relevancy details: {ctx_rel_result}")
221221

222222
return {
223-
"faithfulness": faith_result.eval_details,
224-
"answer_relevancy": ans_rel_result.eval_details,
225-
"context_relevancy": ctx_rel_result.eval_details
223+
"faithfulness": faith_result,
224+
"answer_relevancy": ans_rel_result,
225+
"context_relevancy": ctx_rel_result
226226
}
227227

228228

examples/rag/sdk_rag_eval.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ def test_faithfulness():
4848

4949
print("\n用例1 - 忠实的答案:")
5050
result1 = LLMRAGFaithfulness.eval(data1)
51-
print(f" 状态: {'✅ 通过' if not result1.eval_status else '❌ 未通过'}")
52-
print(f" 详情: {result1.eval_details}")
51+
print(f" 状态: {'✅ 通过' if not result1.status else '❌ 未通过'}")
52+
print(f" 详情: {result1}")
5353

5454
# 测试用例2: 包含幻觉
5555
data2 = Data(
@@ -63,8 +63,8 @@ def test_faithfulness():
6363

6464
print("\n用例2 - 包含幻觉:")
6565
result2 = LLMRAGFaithfulness.eval(data2)
66-
print(f" 状态: {'✅ 通过' if not result2.eval_status else '❌ 未通过'}")
67-
print(f" 详情: {result2.eval_details}")
66+
print(f" 状态: {'✅ 通过' if not result2.status else '❌ 未通过'}")
67+
print(f" 详情: {result2}")
6868
print("\n预期: 用例2分数 < 用例1分数")
6969

7070
return result1, result2
@@ -96,8 +96,8 @@ def test_context_precision():
9696
)
9797

9898
result = LLMRAGContextPrecision.eval(data)
99-
print(f" 状态: {'✅ 通过' if not result.eval_status else '❌ 未通过'}")
100-
print(f" 详情: {result.eval_details}")
99+
print(f" 状态: {'✅ 通过' if not result.status else '❌ 未通过'}")
100+
print(f" 详情: {result}")
101101
print("\n预期: 前3个上下文相关,最后1个不相关")
102102

103103
return result
@@ -125,8 +125,8 @@ def test_answer_relevancy():
125125

126126
print("\n用例1 - 直接回答:")
127127
result1 = LLMRAGAnswerRelevancy.eval(data1)
128-
print(f" 状态: {'✅ 通过' if not result1.eval_status else '❌ 未通过'}")
129-
print(f" 详情: {result1.eval_details}")
128+
print(f" 状态: {'✅ 通过' if not result1.status else '❌ 未通过'}")
129+
print(f" 详情: {result1}")
130130

131131
# 测试用例2: 包含无关信息
132132
data2 = Data(
@@ -137,8 +137,8 @@ def test_answer_relevancy():
137137

138138
print("\n用例2 - 包含无关信息:")
139139
result2 = LLMRAGAnswerRelevancy.eval(data2)
140-
print(f" 状态: {'✅ 通过' if not result2.eval_status else '❌ 未通过'}")
141-
print(f" 详情: {result2.eval_details}")
140+
print(f" 状态: {'✅ 通过' if not result2.status else '❌ 未通过'}")
141+
print(f" 详情: {result2}")
142142
print("\n预期: 用例2分数 < 用例1分数")
143143

144144
return result1, result2
@@ -170,8 +170,8 @@ def test_context_recall():
170170

171171
print("\n用例1 - 上下文完全支持:")
172172
result1 = LLMRAGContextRecall.eval(data1)
173-
print(f" 状态: {'✅ 通过' if not result1.eval_status else '❌ 未通过'}")
174-
print(f" 详情: {result1.eval_details}")
173+
print(f" 状态: {'✅ 通过' if not result1.status else '❌ 未通过'}")
174+
print(f" 详情: {result1}")
175175

176176
# 测试用例2: 上下文部分支持答案
177177
data2 = Data(
@@ -186,8 +186,8 @@ def test_context_recall():
186186

187187
print("\n用例2 - 上下文部分支持:")
188188
result2 = LLMRAGContextRecall.eval(data2)
189-
print(f" 状态: {'✅ 通过' if not result2.eval_status else '❌ 未通过'}")
190-
print(f" 详情: {result2.eval_details}")
189+
print(f" 状态: {'✅ 通过' if not result2.status else '❌ 未通过'}")
190+
print(f" 详情: {result2}")
191191
print("\n预期: 用例2分数 < 用例1分数")
192192

193193
return result1, result2
@@ -219,8 +219,8 @@ def test_context_relevancy():
219219

220220
print("\n用例1 - 所有上下文相关:")
221221
result1 = LLMRAGContextRelevancy.eval(data1)
222-
print(f" 状态: {'✅ 通过' if not result1.eval_status else '❌ 未通过'}")
223-
print(f" 详情: {result1.eval_details}")
222+
print(f" 状态: {'✅ 通过' if not result1.status else '❌ 未通过'}")
223+
print(f" 详情: {result1}")
224224

225225
# 测试用例2: 包含不相关上下文
226226
data2 = Data(
@@ -235,8 +235,8 @@ def test_context_relevancy():
235235

236236
print("\n用例2 - 包含不相关上下文:")
237237
result2 = LLMRAGContextRelevancy.eval(data2)
238-
print(f" 状态: {'✅ 通过' if not result2.eval_status else '❌ 未通过'}")
239-
print(f" 详情: {result2.eval_details}")
238+
print(f" 状态: {'✅ 通过' if not result2.status else '❌ 未通过'}")
239+
print(f" 详情: {result2}")
240240
print("\n预期: 用例2分数 < 用例1分数")
241241

242242
return result1, result2

examples/rag/sdk_rag_eval_batch_dataset.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import logging
1212
import os
1313
import time
14+
from pathlib import Path
1415

1516
from dingo.config.input_args import EvaluatorLLMArgs
1617
from dingo.io.input import Data
@@ -50,7 +51,7 @@
5051
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-large")
5152

5253
# 输入文件路径配置
53-
CSV_FILE_PATH = "ragflow_eval_data_50.jsonl" # 支持CSV和JSONL格式
54+
CSV_FILE_PATH = Path("test/data/ragflow_eval_data_50.jsonl") # 支持CSV和JSONL格式
5455

5556

5657
def evaluate_from_jsonl(jsonl_path):
@@ -126,34 +127,34 @@ def evaluate_from_jsonl(jsonl_path):
126127
# # 进行各项指标评测
127128
print("\n1. 忠实度 (Faithfulness):")
128129
faithfulness_result = LLMRAGFaithfulness.eval(data)
129-
print(f" 状态: {'✅ 通过' if not faithfulness_result.eval_status else '❌ 未通过'}")
130+
print(f" 状态: {'✅ 通过' if not faithfulness_result.status else '❌ 未通过'}")
130131
print(f" 分数: {faithfulness_result.score}/10")
131132
total_faithfulness += faithfulness_result.score
132133

133134
logger.info("\n2. 上下文精度 (Context Precision):")
134135
print("\n2. 上下文精度 (Context Precision):")
135136
precision_result = LLMRAGContextPrecision.eval(data)
136-
logger.info(f" 状态: {'✅ 通过' if not precision_result.eval_status else '❌ 未通过'}")
137+
logger.info(f" 状态: {'✅ 通过' if not precision_result.status else '❌ 未通过'}")
137138
logger.info(f" 分数: {precision_result.score}/10")
138-
print(f" 状态: {'✅ 通过' if not precision_result.eval_status else '❌ 未通过'}")
139+
print(f" 状态: {'✅ 通过' if not precision_result.status else '❌ 未通过'}")
139140
print(f" 分数: {precision_result.score}/10")
140141
total_precision += precision_result.score
141142

142143
print("\n3. 上下文召回 (Context Recall):")
143144
recall_result = LLMRAGContextRecall.eval(data)
144-
print(f" 状态: {'✅ 通过' if not recall_result.eval_status else '❌ 未通过'}")
145+
print(f" 状态: {'✅ 通过' if not recall_result.status else '❌ 未通过'}")
145146
print(f" 分数: {recall_result.score}/10")
146147
total_recall += recall_result.score
147148

148149
print("\n4. 上下文相关性 (Context Relevancy):")
149150
relevancy_result = LLMRAGContextRelevancy.eval(data)
150-
print(f" 状态: {'✅ 通过' if not relevancy_result.eval_status else '❌ 未通过'}")
151+
print(f" 状态: {'✅ 通过' if not relevancy_result.status else '❌ 未通过'}")
151152
print(f" 分数: {relevancy_result.score}/10")
152153
total_relevancy += relevancy_result.score
153154
#
154155
print("\n5. 答案相关性 (Answer Relevancy):")
155156
answer_relevancy_result = LLMRAGAnswerRelevancy.eval(data)
156-
print(f" 状态: {'✅ 通过' if not answer_relevancy_result.eval_status else '❌ 未通过'}")
157+
print(f" 状态: {'✅ 通过' if not answer_relevancy_result.status else '❌ 未通过'}")
157158
print(f" 分数: {answer_relevancy_result.score}/10")
158159
total_answer_relevancy += answer_relevancy_result.score
159160

@@ -269,34 +270,34 @@ def evaluate_from_csv(csv_path):
269270
# # # # 进行各项指标评测
270271
print("\n1. 忠实度 (Faithfulness):")
271272
faithfulness_result = LLMRAGFaithfulness.eval(data)
272-
print(f" 状态: {'✅ 通过' if not faithfulness_result.eval_status else '❌ 未通过'}")
273+
print(f" 状态: {'✅ 通过' if not faithfulness_result.status else '❌ 未通过'}")
273274
print(f" 分数: {faithfulness_result.score}/10")
274275
total_faithfulness += faithfulness_result.score
275276

276277
logger.info("\n2. 上下文精度 (Context Precision):")
277278
print("\n2. 上下文精度 (Context Precision):")
278279
precision_result = LLMRAGContextPrecision.eval(data)
279-
logger.info(f" 状态: {'✅ 通过' if not precision_result.eval_status else '❌ 未通过'}")
280+
logger.info(f" 状态: {'✅ 通过' if not precision_result.status else '❌ 未通过'}")
280281
logger.info(f" 分数: {precision_result.score}/10")
281-
print(f" 状态: {'✅ 通过' if not precision_result.eval_status else '❌ 未通过'}")
282+
print(f" 状态: {'✅ 通过' if not precision_result.status else '❌ 未通过'}")
282283
print(f" 分数: {precision_result.score}/10")
283284
total_precision += precision_result.score
284285

285286
print("\n3. 上下文召回 (Context Recall):")
286287
recall_result = LLMRAGContextRecall.eval(data)
287-
print(f" 状态: {'✅ 通过' if not recall_result.eval_status else '❌ 未通过'}")
288+
print(f" 状态: {'✅ 通过' if not recall_result.status else '❌ 未通过'}")
288289
print(f" 分数: {recall_result.score}/10")
289290
total_recall += recall_result.score
290291

291292
print("\n4. 上下文相关性 (Context Relevancy):")
292293
relevancy_result = LLMRAGContextRelevancy.eval(data)
293-
print(f" 状态: {'✅ 通过' if not relevancy_result.eval_status else '❌ 未通过'}")
294+
print(f" 状态: {'✅ 通过' if not relevancy_result.status else '❌ 未通过'}")
294295
print(f" 分数: {relevancy_result.score}/10")
295296
total_relevancy += relevancy_result.score
296297

297298
print("\n5. 答案相关性 (Answer Relevancy):")
298299
answer_relevancy_result = LLMRAGAnswerRelevancy.eval(data)
299-
print(f" 状态: {'✅ 通过' if not answer_relevancy_result.eval_status else '❌ 未通过'}")
300+
print(f" 状态: {'✅ 通过' if not answer_relevancy_result.status else '❌ 未通过'}")
300301
print(f" 分数: {answer_relevancy_result.score}/10")
301302
total_answer_relevancy += answer_relevancy_result.score
302303

0 commit comments

Comments
 (0)