@@ -64,16 +64,16 @@ def print_metrics_summary(summary: SummaryModel):
6464 # 简化指标名称显示
6565 display_name = metric_name .replace ("LLMRAG" , "" )
6666 print (f"\n { display_name } :" )
67- print (f" 平均分: { stats .get ('score_average' , 0 ):.2f} /10 " )
68- print (f" 最小分: { stats .get ('score_min' , 0 ):.2f} /10 " )
69- print (f" 最大分: { stats .get ('score_max' , 0 ):.2f} /10 " )
67+ print (f" 平均分: { stats .get ('score_average' , 0 ):.2f} " )
68+ print (f" 最小分: { stats .get ('score_min' , 0 ):.2f} " )
69+ print (f" 最大分: { stats .get ('score_max' , 0 ):.2f} " )
7070 print (f" 样本数: { stats .get ('score_count' , 0 )} " )
7171 if 'score_std_dev' in stats :
7272 print (f" 标准差: { stats .get ('score_std_dev' , 0 ):.2f} " )
7373
7474 # 打印该字段组的总平均分
7575 overall_avg = summary .get_metrics_score_overall_average (field_key )
76- print (f"\n 🎯 该字段组总平均分: { overall_avg :.2f} /10 " )
76+ print (f"\n 🎯 该字段组总平均分: { overall_avg :.2f} " )
7777
7878 # 打印该字段组的指标排名(从高到低)
7979 metrics_summary = summary .get_metrics_score_summary (field_key )
@@ -82,7 +82,7 @@ def print_metrics_summary(summary: SummaryModel):
8282 print (f"\n 📈 指标排名(从高到低):" )
8383 for i , (metric_name , avg_score ) in enumerate (sorted_metrics , 1 ):
8484 display_name = metric_name .replace ("LLMRAG" , "" )
85- print (f" { i } . { display_name } : { avg_score :.2f} /10 " )
85+ print (f" { i } . { display_name } : { avg_score :.2f} " )
8686
8787 # 如果有多个字段组,打印总体统计
8888 if len (summary .metrics_score_stats ) > 1 :
@@ -91,7 +91,7 @@ def print_metrics_summary(summary: SummaryModel):
9191 print ("=" * 80 )
9292 for field_key in summary .metrics_score_stats .keys ():
9393 overall_avg = summary .get_metrics_score_overall_average (field_key )
94- print (f" { field_key } : { overall_avg :.2f} /10 " )
94+ print (f" { field_key } : { overall_avg :.2f} " )
9595
9696 print ("\n " + "=" * 80 )
9797
@@ -108,12 +108,29 @@ def run_rag_evaluation():
108108 print (f"API: { OPENAI_URL } " )
109109 print ("=" * 80 )
110110
111+ llm_config = {
112+ "model" : OPENAI_MODEL ,
113+ "key" : OPENAI_KEY ,
114+ "api_url" : OPENAI_URL ,
115+ }
116+
117+ llm_config_embedding = {
118+ "model" : OPENAI_MODEL ,
119+ "key" : OPENAI_KEY ,
120+ "api_url" : OPENAI_URL ,
121+ "parameters" : {
122+ "embedding_model" : EMBEDDING_MODEL ,
123+ "strictness" : 3 ,
124+ "threshold" : 5
125+ }
126+ }
127+
111128 # 构建配置
112129 input_data = {
113130 "task_name" : "rag_evaluation_with_metrics" ,
114131 "input_path" : INPUT_DATA_PATH ,
115132 "output_path" : "outputs/" ,
116- "log_level" : "INFO" ,
133+ # "log_level": "INFO",
117134 "dataset" : {
118135 "source" : "local" ,
119136 "format" : "jsonl" ,
@@ -146,50 +163,25 @@ def run_rag_evaluation():
146163 "evals" : [
147164 {
148165 "name" : "LLMRAGFaithfulness" ,
149- "config" : {
150- "model" : OPENAI_MODEL ,
151- "key" : OPENAI_KEY ,
152- "api_url" : OPENAI_URL ,
153- }
166+ "config" : llm_config
154167 },
155168 {
156169 "name" : "LLMRAGContextPrecision" ,
157- "config" : {
158- "model" : OPENAI_MODEL ,
159- "key" : OPENAI_KEY ,
160- "api_url" : OPENAI_URL ,
161- }
170+ "config" : llm_config
162171 },
163172 {
164173 "name" : "LLMRAGContextRecall" ,
165- "config" : {
166- "model" : OPENAI_MODEL ,
167- "key" : OPENAI_KEY ,
168- "api_url" : OPENAI_URL ,
169- }
174+ "config" : llm_config
170175 },
171176 {
172177 "name" : "LLMRAGContextRelevancy" ,
173- "config" : {
174- "model" : OPENAI_MODEL ,
175- "key" : OPENAI_KEY ,
176- "api_url" : OPENAI_URL ,
177- }
178+ "config" : llm_config
178179 },
179180 # Answer Relevancy 需要 Embedding API
180181 # 如果您的 API 支持 embeddings 端点,可以启用此项
181182 {
182183 "name" : "LLMRAGAnswerRelevancy" ,
183- "config" : {
184- "model" : OPENAI_MODEL ,
185- "key" : OPENAI_KEY ,
186- "api_url" : OPENAI_URL ,
187- "parameters" : {
188- "embedding_model" : EMBEDDING_MODEL ,
189- "strictness" : 3 ,
190- "threshold" : 5
191- }
192- }
184+ "config" : llm_config_embedding
193185 }
194186 ]
195187 }
0 commit comments