1010from tqdm import tqdm
1111
1212from dingo .config import InputArgs
13- from dingo .config .input_args import EvalPipline
1413from dingo .data import Dataset , DataSource , dataset_map , datasource_map
1514from dingo .exec .base import ExecProto , Executor
1615from dingo .io import Data , ResultInfo , SummaryModel
16+ from dingo .io .output .eval_detail import EvalDetail
1717from dingo .model import Model
1818from dingo .model .llm .base import BaseLLM
19- from dingo .model .modelres import EvalDetail , ModelRes
20- from dingo .model .rule .base import BaseRule
2119from dingo .utils import log
2220
2321
@@ -110,23 +108,20 @@ def execute(self) -> SummaryModel:
110108 futures_results = self .merge_result_info (futures_results , result_info )
111109
112110 for result_info in futures_results :
113- # 统计eval_details,第一层key是字段名组合,第二层value是EvalDetail
111+ # 统计eval_details,第一层key是字段名组合,第二层value是List[EvalDetail]
114112 # 错误类型从EvalDetail.label中获取
115- for field_key , eval_detail in result_info .eval_details .items ():
113+ for field_key , eval_detail_list in result_info .eval_details .items ():
116114 if field_key not in self .summary .type_ratio :
117115 self .summary .type_ratio [field_key ] = {}
118- # 遍历 EvalDetail.label 中的每个错误类型
119- # 兼容 dict 和 EvalDetail 对象两种情况
120- if isinstance (eval_detail , dict ):
121- label_list = eval_detail .get ('label' , [])
122- else :
123- label_list = eval_detail .label
124-
125- for eval_details_name in label_list :
126- if eval_details_name not in self .summary .type_ratio [field_key ]:
127- self .summary .type_ratio [field_key ][eval_details_name ] = 1
128- else :
129- self .summary .type_ratio [field_key ][eval_details_name ] += 1
116+ # 遍历 List[EvalDetail]
117+ for eval_detail in eval_detail_list :
118+ # 获取label列表
119+ label_list = eval_detail .label if eval_detail .label else []
120+ for label in label_list :
121+ if label not in self .summary .type_ratio [field_key ]:
122+ self .summary .type_ratio [field_key ][label ] = 1
123+ else :
124+ self .summary .type_ratio [field_key ][label ] += 1
130125
131126 if result_info .eval_status :
132127 self .summary .num_bad += 1
@@ -166,8 +161,7 @@ def evaluate_single_data(self, dingo_id: str, eval_fields: dict, eval_type: str,
166161 ResultInfo containing evaluation results
167162 """
168163 result_info = ResultInfo (dingo_id = dingo_id )
169- bad_eval_details = None
170- good_eval_details = None
164+ eval_detail_list = []
171165
172166 for e_c_i in eval_list :
173167 # Get model class and instantiate
@@ -183,55 +177,32 @@ def evaluate_single_data(self, dingo_id: str, eval_fields: dict, eval_type: str,
183177 raise ValueError (f"Error eval_type: { eval_type } " )
184178
185179 # Execute evaluation
186- tmp : ModelRes = model .eval (Data (** map_data ))
187- if isinstance (tmp .eval_details , dict ):
188- tmp .eval_details = EvalDetail (** tmp .eval_details )
180+ tmp : EvalDetail = model .eval (Data (** map_data ))
189181
190- # Collect eval_details from ModelRes
191- if tmp .eval_status :
182+ # 直接添加EvalDetail到列表中,不再merge
183+ eval_detail_list .append (tmp )
184+
185+ # 如果任意一个EvalDetail的status为True,则result_info.eval_status为True
186+ if tmp .status :
192187 result_info .eval_status = True
193- # 合并 bad 的 eval_details (ModelRes.eval_details 现在直接是 EvalDetail)
194- if isinstance (bad_eval_details , dict ):
195- bad_eval_details = EvalDetail (** bad_eval_details )
196- if bad_eval_details :
197- bad_eval_details .merge (tmp .eval_details )
198- else :
199- bad_eval_details = tmp .eval_details .copy ()
200- else :
201- # 合并 good 的 eval_details (ModelRes.eval_details 现在直接是 EvalDetail)
202- if isinstance (good_eval_details , dict ):
203- good_eval_details = EvalDetail (** good_eval_details )
204- if good_eval_details :
205- good_eval_details .merge (tmp .eval_details )
206- else :
207- good_eval_details = tmp .eval_details .copy ()
208188
209- # Set result_info fields based on all_labels configuration and add field
210- join_fields = ',' .join (eval_fields .values ())
189+ # Set result_info fields
190+ join_fields = ',' .join (eval_fields .values ()) if eval_fields else 'default'
211191
192+ # 根据配置决定保存哪些结果
212193 if self .input_args .executor .result_save .all_labels :
213- # Always include both good and bad results when they exist
214- # The final eval_status is True if ANY evaluation failed
215- # 合并 good 和 bad 的 eval_details (现在是 EvalDetail 对象)
216- all_eval_details = None
217- if bad_eval_details :
218- all_eval_details = bad_eval_details .copy ()
219- if good_eval_details :
220- if all_eval_details :
221- all_eval_details .merge (good_eval_details )
222- else :
223- all_eval_details = good_eval_details .copy ()
224- # add field (ResultInfo.eval_details 现在是 Dict[str, EvalDetail])
225- if all_eval_details :
226- result_info .eval_details = {join_fields : all_eval_details }
194+ # 保存所有结果
195+ if eval_detail_list :
196+ result_info .eval_details = {join_fields : eval_detail_list }
227197 else :
228- # add field (ResultInfo.eval_details 现在是 Dict[str, EvalDetail])
198+ # 只保存bad或good的结果
229199 if result_info .eval_status :
230- if bad_eval_details :
231- result_info .eval_details = {join_fields : bad_eval_details }
200+ # 有bad结果,只保留status=True的EvalDetail
201+ result_info .eval_details = {join_fields : [ mr for mr in eval_detail_list if mr . status ] }
232202 else :
233- if good_eval_details and self .input_args .executor .result_save .good :
234- result_info .eval_details = {join_fields : good_eval_details }
203+ # 都是good结果,根据配置决定是否保存,只保留status=False的EvalDetail
204+ if self .input_args .executor .result_save .good :
205+ result_info .eval_details = {join_fields : [mr for mr in eval_detail_list if not mr .status ]}
235206
236207 return result_info
237208
@@ -241,14 +212,14 @@ def merge_result_info(self, existing_list: List[ResultInfo], new_item: ResultInf
241212 if existing_item :
242213 existing_item .eval_status = existing_item .eval_status or new_item .eval_status
243214
244- # 合并 eval_details 字典(第一层是字段名,第二层直接是 EvalDetail)
215+ # 合并 eval_details 字典(第一层是字段名,第二层是List[ EvalDetail] )
245216 for key , value in new_item .eval_details .items ():
246- # 第一层是字段名,如果存在,则合并 EvalDetail
217+ # 第一层是字段名,如果存在,则extend List[ EvalDetail]
247218 if key in existing_item .eval_details :
248- existing_item .eval_details [key ].merge (value )
249- # 第一层是字段名,如果不存在,则创建副本
219+ existing_item .eval_details [key ].extend (value )
220+ # 第一层是字段名,如果不存在,则直接赋值
250221 else :
251- existing_item .eval_details [key ] = value . copy ()
222+ existing_item .eval_details [key ] = value
252223 else :
253224 existing_list .append (new_item )
254225
@@ -279,42 +250,53 @@ def write_single_data(
279250 if not input_args .executor .result_save .good and not result_info .eval_status :
280251 return
281252
282- # 遍历 eval_details 的第一层(字段名组合),第二层直接是 EvalDetail
283- for field_name , eval_detail in result_info .eval_details .items ():
253+ # 用集合记录已经写过的(字段名, label名)组合,避免重复写入
254+ written_labels = set ()
255+
256+ # 遍历 eval_details 的第一层(字段名组合),第二层是List[EvalDetail]
257+ for field_name , eval_detail_list in result_info .eval_details .items ():
284258 # 第一层:根据字段名创建文件夹
285259 field_dir = os .path .join (path , field_name )
286260 if not os .path .exists (field_dir ):
287261 os .makedirs (field_dir )
288262
289- # 从 EvalDetail.label 中获取错误类型列表
290- if isinstance (eval_detail , dict ):
291- label_list = eval_detail .get ('label' , [])
292- else :
293- label_list = eval_detail .label
294- for eval_details_name in label_list :
295- # 按点分割错误类型名称,创建多层文件夹
296- # 例如: "validity_errors.space_issues" -> ["validity_errors", "space_issues"]
297- parts = eval_details_name .split ("." )
298-
299- # 除了最后一部分,其他部分都是文件夹
300- if len (parts ) > 1 :
301- # 创建多层文件夹
302- folder_path = os .path .join (field_dir , * parts [:- 1 ])
303- if not os .path .exists (folder_path ):
304- os .makedirs (folder_path )
305- # 最后一部分作为文件名
306- file_name = parts [- 1 ] + ".jsonl"
307- f_n = os .path .join (folder_path , file_name )
308- else :
309- # 没有点分割,直接在字段文件夹下创建文件
310- f_n = os .path .join (field_dir , parts [0 ] + ".jsonl" )
311-
312- with open (f_n , "a" , encoding = "utf-8" ) as f :
313- if input_args .executor .result_save .raw :
314- str_json = json .dumps (result_info .to_raw_dict (), ensure_ascii = False )
263+ # 遍历 List[EvalDetail]
264+ for eval_detail in eval_detail_list :
265+ # 从 EvalDetail.label 中获取错误类型列表
266+ label_list = eval_detail .label if eval_detail .label else []
267+
268+ for eval_details_name in label_list :
269+ # 检查是否已经写过这个(字段名, label名)组合
270+ label_key = (field_name , eval_details_name )
271+ if label_key in written_labels :
272+ continue
273+
274+ # 标记为已写入
275+ written_labels .add (label_key )
276+
277+ # 按点分割错误类型名称,创建多层文件夹
278+ # 例如: "validity_errors.space_issues" -> ["validity_errors", "space_issues"]
279+ parts = eval_details_name .split ("." )
280+
281+ # 除了最后一部分,其他部分都是文件夹
282+ if len (parts ) > 1 :
283+ # 创建多层文件夹
284+ folder_path = os .path .join (field_dir , * parts [:- 1 ])
285+ if not os .path .exists (folder_path ):
286+ os .makedirs (folder_path )
287+ # 最后一部分作为文件名
288+ file_name = parts [- 1 ] + ".jsonl"
289+ f_n = os .path .join (folder_path , file_name )
315290 else :
316- str_json = json .dumps (result_info .to_dict (), ensure_ascii = False )
317- f .write (str_json + "\n " )
291+ # 没有点分割,直接在字段文件夹下创建文件
292+ f_n = os .path .join (field_dir , parts [0 ] + ".jsonl" )
293+
294+ with open (f_n , "a" , encoding = "utf-8" ) as f :
295+ if input_args .executor .result_save .raw :
296+ str_json = json .dumps (result_info .to_raw_dict (), ensure_ascii = False )
297+ else :
298+ str_json = json .dumps (result_info .to_dict (), ensure_ascii = False )
299+ f .write (str_json + "\n " )
318300
319301 def write_summary (self , path : str , input_args : InputArgs , summary : SummaryModel ):
320302 if not input_args .executor .result_save .bad :
0 commit comments