Skip to content

Commit 958a770

Browse files
e06084actions-user
andauthored
fix: fix ut (#288)
* fix: fix ut * x * 🎨 Auto-format code with pre-commit --------- Co-authored-by: GitHub Action <[email protected]>
1 parent 02c4401 commit 958a770

File tree

9 files changed

+438
-560
lines changed

9 files changed

+438
-560
lines changed

.github/workflows/IntegrationTest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,4 @@ jobs:
6262
python -m dingo.run.cli --input .github/env/custom_config_rule.json
6363
- name: Run unit tests
6464
run: |
65-
pytest test/scripts --ignore=test/scripts/data --ignore=test/scripts/model/llm/test_llm_html_extract_compare_v2.py --ignore=test/scripts/model/llm/test_rag_metrics.py
65+
pytest test/scripts --ignore=test/scripts/data

docs/ats_resume_guide.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,4 +201,3 @@ python examples/ats_resume/sdk_keyword_matcher.py
201201
# 运行简历优化示例
202202
python examples/ats_resume/sdk_resume_optimizer.py
203203
```
204-

examples/ats_resume/sdk_keyword_matcher.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,4 +172,3 @@ def example_3_low_match():
172172
# example_3_low_match()
173173

174174
print("✅ Examples completed!")
175-

examples/ats_resume/sdk_resume_optimizer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,4 +169,3 @@ def example_3_full_pipeline():
169169
# example_3_full_pipeline()
170170

171171
print("✅ Examples completed!")
172-

examples/rag/dataset_rag_eval_with_all_metrics.py

Lines changed: 29 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,16 @@ def print_metrics_summary(summary: SummaryModel):
6464
# 简化指标名称显示
6565
display_name = metric_name.replace("LLMRAG", "")
6666
print(f"\n {display_name}:")
67-
print(f" 平均分: {stats.get('score_average', 0):.2f}/10")
68-
print(f" 最小分: {stats.get('score_min', 0):.2f}/10")
69-
print(f" 最大分: {stats.get('score_max', 0):.2f}/10")
67+
print(f" 平均分: {stats.get('score_average', 0):.2f}")
68+
print(f" 最小分: {stats.get('score_min', 0):.2f}")
69+
print(f" 最大分: {stats.get('score_max', 0):.2f}")
7070
print(f" 样本数: {stats.get('score_count', 0)}")
7171
if 'score_std_dev' in stats:
7272
print(f" 标准差: {stats.get('score_std_dev', 0):.2f}")
7373

7474
# 打印该字段组的总平均分
7575
overall_avg = summary.get_metrics_score_overall_average(field_key)
76-
print(f"\n 🎯 该字段组总平均分: {overall_avg:.2f}/10")
76+
print(f"\n 🎯 该字段组总平均分: {overall_avg:.2f}")
7777

7878
# 打印该字段组的指标排名(从高到低)
7979
metrics_summary = summary.get_metrics_score_summary(field_key)
@@ -82,7 +82,7 @@ def print_metrics_summary(summary: SummaryModel):
8282
print(f"\n 📈 指标排名(从高到低):")
8383
for i, (metric_name, avg_score) in enumerate(sorted_metrics, 1):
8484
display_name = metric_name.replace("LLMRAG", "")
85-
print(f" {i}. {display_name}: {avg_score:.2f}/10")
85+
print(f" {i}. {display_name}: {avg_score:.2f}")
8686

8787
# 如果有多个字段组,打印总体统计
8888
if len(summary.metrics_score_stats) > 1:
@@ -91,7 +91,7 @@ def print_metrics_summary(summary: SummaryModel):
9191
print("=" * 80)
9292
for field_key in summary.metrics_score_stats.keys():
9393
overall_avg = summary.get_metrics_score_overall_average(field_key)
94-
print(f" {field_key}: {overall_avg:.2f}/10")
94+
print(f" {field_key}: {overall_avg:.2f}")
9595

9696
print("\n" + "=" * 80)
9797

@@ -108,12 +108,29 @@ def run_rag_evaluation():
108108
print(f"API: {OPENAI_URL}")
109109
print("=" * 80)
110110

111+
llm_config = {
112+
"model": OPENAI_MODEL,
113+
"key": OPENAI_KEY,
114+
"api_url": OPENAI_URL,
115+
}
116+
117+
llm_config_embedding = {
118+
"model": OPENAI_MODEL,
119+
"key": OPENAI_KEY,
120+
"api_url": OPENAI_URL,
121+
"parameters": {
122+
"embedding_model": EMBEDDING_MODEL,
123+
"strictness": 3,
124+
"threshold": 5
125+
}
126+
}
127+
111128
# 构建配置
112129
input_data = {
113130
"task_name": "rag_evaluation_with_metrics",
114131
"input_path": INPUT_DATA_PATH,
115132
"output_path": "outputs/",
116-
"log_level": "INFO",
133+
# "log_level": "INFO",
117134
"dataset": {
118135
"source": "local",
119136
"format": "jsonl",
@@ -146,50 +163,25 @@ def run_rag_evaluation():
146163
"evals": [
147164
{
148165
"name": "LLMRAGFaithfulness",
149-
"config": {
150-
"model": OPENAI_MODEL,
151-
"key": OPENAI_KEY,
152-
"api_url": OPENAI_URL,
153-
}
166+
"config": llm_config
154167
},
155168
{
156169
"name": "LLMRAGContextPrecision",
157-
"config": {
158-
"model": OPENAI_MODEL,
159-
"key": OPENAI_KEY,
160-
"api_url": OPENAI_URL,
161-
}
170+
"config": llm_config
162171
},
163172
{
164173
"name": "LLMRAGContextRecall",
165-
"config": {
166-
"model": OPENAI_MODEL,
167-
"key": OPENAI_KEY,
168-
"api_url": OPENAI_URL,
169-
}
174+
"config": llm_config
170175
},
171176
{
172177
"name": "LLMRAGContextRelevancy",
173-
"config": {
174-
"model": OPENAI_MODEL,
175-
"key": OPENAI_KEY,
176-
"api_url": OPENAI_URL,
177-
}
178+
"config": llm_config
178179
},
179180
# Answer Relevancy 需要 Embedding API
180181
# 如果您的 API 支持 embeddings 端点,可以启用此项
181182
{
182183
"name": "LLMRAGAnswerRelevancy",
183-
"config": {
184-
"model": OPENAI_MODEL,
185-
"key": OPENAI_KEY,
186-
"api_url": OPENAI_URL,
187-
"parameters": {
188-
"embedding_model": EMBEDDING_MODEL,
189-
"strictness": 3,
190-
"threshold": 5
191-
}
192-
}
184+
"config": llm_config_embedding
193185
}
194186
]
195187
}

0 commit comments

Comments
 (0)