Skip to content

Commit 040fa6f

Browse files
feat: update modelres (#278)
* feat: update modelres * 🎨 Auto-format code with pre-commit * feat: change modelres to evaldetail * feat: add * feat: fix lint * feat: rule 系列返回结果更新 * feat: llm 系列返回结果更新 * feat: fix other file modelres * feat: fix md file modelres * feat: spark 更新返回结果 * feat: 删除ModelRes * feat: fix lint * feat: fix bug * feat: fix lint * feat: fix bug --------- Co-authored-by: GitHub Action <[email protected]>
1 parent b9180b1 commit 040fa6f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1276
-1989
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,8 @@ from dingo.model import Model
297297
from dingo.model.rule.base import BaseRule
298298
from dingo.config.input_args import EvaluatorRuleArgs
299299
from dingo.io import Data
300-
from dingo.model.modelres import ModelRes
300+
from dingo.io.output.eval_detail import EvalDetail
301+
301302

302303
@Model.rule_register('QUALITY_BAD_RELEVANCE', ['default'])
303304
class MyCustomRule(BaseRule):
@@ -306,8 +307,8 @@ class MyCustomRule(BaseRule):
306307
dynamic_config = EvaluatorRuleArgs(pattern=r'your_pattern_here')
307308

308309
@classmethod
309-
def eval(cls, input_data: Data) -> ModelRes:
310-
res = ModelRes()
310+
def eval(cls, input_data: Data) -> EvalDetail:
311+
res = EvalDetail()
311312
# Your rule implementation here
312313
return res
313314
```

README_ja.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,8 @@ from dingo.model import Model
290290
from dingo.model.rule.base import BaseRule
291291
from dingo.config.input_args import EvaluatorRuleArgs
292292
from dingo.io import Data
293-
from dingo.model.modelres import ModelRes
293+
from dingo.io.output.eval_detail import EvalDetail
294+
294295

295296
@Model.rule_register('QUALITY_BAD_RELEVANCE', ['default'])
296297
class MyCustomRule(BaseRule):
@@ -299,8 +300,8 @@ class MyCustomRule(BaseRule):
299300
dynamic_config = EvaluatorRuleArgs(pattern=r'your_pattern_here')
300301

301302
@classmethod
302-
def eval(cls, input_data: Data) -> ModelRes:
303-
res = ModelRes()
303+
def eval(cls, input_data: Data) -> EvalDetail:
304+
res = EvalDetail()
304305
# ここにルール実装
305306
return res
306307
```

README_zh-CN.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,8 @@ from dingo.model import Model
296296
from dingo.model.rule.base import BaseRule
297297
from dingo.config.input_args import EvaluatorRuleArgs
298298
from dingo.io import Data
299-
from dingo.model.modelres import ModelRes
299+
from dingo.io.output.eval_detail import EvalDetail
300+
300301

301302
@Model.rule_register('QUALITY_BAD_RELEVANCE', ['default'])
302303
class MyCustomRule(BaseRule):
@@ -305,8 +306,8 @@ class MyCustomRule(BaseRule):
305306
dynamic_config = EvaluatorRuleArgs(pattern=r'your_pattern_here')
306307

307308
@classmethod
308-
def eval(cls, input_data: Data) -> ModelRes:
309-
res = ModelRes()
309+
def eval(cls, input_data: Data) -> EvalDetail:
310+
res = EvalDetail()
310311
# 您的规则实现
311312
return res
312313
```

dingo/exec/local.py

Lines changed: 77 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@
1010
from tqdm import tqdm
1111

1212
from dingo.config import InputArgs
13-
from dingo.config.input_args import EvalPipline
1413
from dingo.data import Dataset, DataSource, dataset_map, datasource_map
1514
from dingo.exec.base import ExecProto, Executor
1615
from dingo.io import Data, ResultInfo, SummaryModel
16+
from dingo.io.output.eval_detail import EvalDetail
1717
from dingo.model import Model
1818
from dingo.model.llm.base import BaseLLM
19-
from dingo.model.modelres import EvalDetail, ModelRes
20-
from dingo.model.rule.base import BaseRule
2119
from dingo.utils import log
2220

2321

@@ -110,23 +108,20 @@ def execute(self) -> SummaryModel:
110108
futures_results = self.merge_result_info(futures_results, result_info)
111109

112110
for result_info in futures_results:
113-
# 统计eval_details,第一层key是字段名组合,第二层value是EvalDetail
111+
# 统计eval_details,第一层key是字段名组合,第二层value是List[EvalDetail]
114112
# 错误类型从EvalDetail.label中获取
115-
for field_key, eval_detail in result_info.eval_details.items():
113+
for field_key, eval_detail_list in result_info.eval_details.items():
116114
if field_key not in self.summary.type_ratio:
117115
self.summary.type_ratio[field_key] = {}
118-
# 遍历 EvalDetail.label 中的每个错误类型
119-
# 兼容 dict 和 EvalDetail 对象两种情况
120-
if isinstance(eval_detail, dict):
121-
label_list = eval_detail.get('label', [])
122-
else:
123-
label_list = eval_detail.label
124-
125-
for eval_details_name in label_list:
126-
if eval_details_name not in self.summary.type_ratio[field_key]:
127-
self.summary.type_ratio[field_key][eval_details_name] = 1
128-
else:
129-
self.summary.type_ratio[field_key][eval_details_name] += 1
116+
# 遍历 List[EvalDetail]
117+
for eval_detail in eval_detail_list:
118+
# 获取label列表
119+
label_list = eval_detail.label if eval_detail.label else []
120+
for label in label_list:
121+
if label not in self.summary.type_ratio[field_key]:
122+
self.summary.type_ratio[field_key][label] = 1
123+
else:
124+
self.summary.type_ratio[field_key][label] += 1
130125

131126
if result_info.eval_status:
132127
self.summary.num_bad += 1
@@ -166,8 +161,7 @@ def evaluate_single_data(self, dingo_id: str, eval_fields: dict, eval_type: str,
166161
ResultInfo containing evaluation results
167162
"""
168163
result_info = ResultInfo(dingo_id=dingo_id)
169-
bad_eval_details = None
170-
good_eval_details = None
164+
eval_detail_list = []
171165

172166
for e_c_i in eval_list:
173167
# Get model class and instantiate
@@ -183,55 +177,32 @@ def evaluate_single_data(self, dingo_id: str, eval_fields: dict, eval_type: str,
183177
raise ValueError(f"Error eval_type: {eval_type}")
184178

185179
# Execute evaluation
186-
tmp: ModelRes = model.eval(Data(**map_data))
187-
if isinstance(tmp.eval_details, dict):
188-
tmp.eval_details = EvalDetail(**tmp.eval_details)
180+
tmp: EvalDetail = model.eval(Data(**map_data))
189181

190-
# Collect eval_details from ModelRes
191-
if tmp.eval_status:
182+
# 直接添加EvalDetail到列表中,不再merge
183+
eval_detail_list.append(tmp)
184+
185+
# 如果任意一个EvalDetail的status为True,则result_info.eval_status为True
186+
if tmp.status:
192187
result_info.eval_status = True
193-
# 合并 bad 的 eval_details (ModelRes.eval_details 现在直接是 EvalDetail)
194-
if isinstance(bad_eval_details, dict):
195-
bad_eval_details = EvalDetail(**bad_eval_details)
196-
if bad_eval_details:
197-
bad_eval_details.merge(tmp.eval_details)
198-
else:
199-
bad_eval_details = tmp.eval_details.copy()
200-
else:
201-
# 合并 good 的 eval_details (ModelRes.eval_details 现在直接是 EvalDetail)
202-
if isinstance(good_eval_details, dict):
203-
good_eval_details = EvalDetail(**good_eval_details)
204-
if good_eval_details:
205-
good_eval_details.merge(tmp.eval_details)
206-
else:
207-
good_eval_details = tmp.eval_details.copy()
208188

209-
# Set result_info fields based on all_labels configuration and add field
210-
join_fields = ','.join(eval_fields.values())
189+
# Set result_info fields
190+
join_fields = ','.join(eval_fields.values()) if eval_fields else 'default'
211191

192+
# 根据配置决定保存哪些结果
212193
if self.input_args.executor.result_save.all_labels:
213-
# Always include both good and bad results when they exist
214-
# The final eval_status is True if ANY evaluation failed
215-
# 合并 good 和 bad 的 eval_details (现在是 EvalDetail 对象)
216-
all_eval_details = None
217-
if bad_eval_details:
218-
all_eval_details = bad_eval_details.copy()
219-
if good_eval_details:
220-
if all_eval_details:
221-
all_eval_details.merge(good_eval_details)
222-
else:
223-
all_eval_details = good_eval_details.copy()
224-
# add field (ResultInfo.eval_details 现在是 Dict[str, EvalDetail])
225-
if all_eval_details:
226-
result_info.eval_details = {join_fields: all_eval_details}
194+
# 保存所有结果
195+
if eval_detail_list:
196+
result_info.eval_details = {join_fields: eval_detail_list}
227197
else:
228-
# add field (ResultInfo.eval_details 现在是 Dict[str, EvalDetail])
198+
# 只保存bad或good的结果
229199
if result_info.eval_status:
230-
if bad_eval_details:
231-
result_info.eval_details = {join_fields: bad_eval_details}
200+
# 有bad结果,只保留status=True的EvalDetail
201+
result_info.eval_details = {join_fields: [mr for mr in eval_detail_list if mr.status]}
232202
else:
233-
if good_eval_details and self.input_args.executor.result_save.good:
234-
result_info.eval_details = {join_fields: good_eval_details}
203+
# 都是good结果,根据配置决定是否保存,只保留status=False的EvalDetail
204+
if self.input_args.executor.result_save.good:
205+
result_info.eval_details = {join_fields: [mr for mr in eval_detail_list if not mr.status]}
235206

236207
return result_info
237208

@@ -241,14 +212,14 @@ def merge_result_info(self, existing_list: List[ResultInfo], new_item: ResultInf
241212
if existing_item:
242213
existing_item.eval_status = existing_item.eval_status or new_item.eval_status
243214

244-
# 合并 eval_details 字典(第一层是字段名,第二层直接是 EvalDetail)
215+
# 合并 eval_details 字典(第一层是字段名,第二层是List[EvalDetail]
245216
for key, value in new_item.eval_details.items():
246-
# 第一层是字段名,如果存在,则合并 EvalDetail
217+
# 第一层是字段名,如果存在,则extend List[EvalDetail]
247218
if key in existing_item.eval_details:
248-
existing_item.eval_details[key].merge(value)
249-
# 第一层是字段名,如果不存在,则创建副本
219+
existing_item.eval_details[key].extend(value)
220+
# 第一层是字段名,如果不存在,则直接赋值
250221
else:
251-
existing_item.eval_details[key] = value.copy()
222+
existing_item.eval_details[key] = value
252223
else:
253224
existing_list.append(new_item)
254225

@@ -279,42 +250,53 @@ def write_single_data(
279250
if not input_args.executor.result_save.good and not result_info.eval_status:
280251
return
281252

282-
# 遍历 eval_details 的第一层(字段名组合),第二层直接是 EvalDetail
283-
for field_name, eval_detail in result_info.eval_details.items():
253+
# 用集合记录已经写过的(字段名, label名)组合,避免重复写入
254+
written_labels = set()
255+
256+
# 遍历 eval_details 的第一层(字段名组合),第二层是List[EvalDetail]
257+
for field_name, eval_detail_list in result_info.eval_details.items():
284258
# 第一层:根据字段名创建文件夹
285259
field_dir = os.path.join(path, field_name)
286260
if not os.path.exists(field_dir):
287261
os.makedirs(field_dir)
288262

289-
# 从 EvalDetail.label 中获取错误类型列表
290-
if isinstance(eval_detail, dict):
291-
label_list = eval_detail.get('label', [])
292-
else:
293-
label_list = eval_detail.label
294-
for eval_details_name in label_list:
295-
# 按点分割错误类型名称,创建多层文件夹
296-
# 例如: "validity_errors.space_issues" -> ["validity_errors", "space_issues"]
297-
parts = eval_details_name.split(".")
298-
299-
# 除了最后一部分,其他部分都是文件夹
300-
if len(parts) > 1:
301-
# 创建多层文件夹
302-
folder_path = os.path.join(field_dir, *parts[:-1])
303-
if not os.path.exists(folder_path):
304-
os.makedirs(folder_path)
305-
# 最后一部分作为文件名
306-
file_name = parts[-1] + ".jsonl"
307-
f_n = os.path.join(folder_path, file_name)
308-
else:
309-
# 没有点分割,直接在字段文件夹下创建文件
310-
f_n = os.path.join(field_dir, parts[0] + ".jsonl")
311-
312-
with open(f_n, "a", encoding="utf-8") as f:
313-
if input_args.executor.result_save.raw:
314-
str_json = json.dumps(result_info.to_raw_dict(), ensure_ascii=False)
263+
# 遍历 List[EvalDetail]
264+
for eval_detail in eval_detail_list:
265+
# 从 EvalDetail.label 中获取错误类型列表
266+
label_list = eval_detail.label if eval_detail.label else []
267+
268+
for eval_details_name in label_list:
269+
# 检查是否已经写过这个(字段名, label名)组合
270+
label_key = (field_name, eval_details_name)
271+
if label_key in written_labels:
272+
continue
273+
274+
# 标记为已写入
275+
written_labels.add(label_key)
276+
277+
# 按点分割错误类型名称,创建多层文件夹
278+
# 例如: "validity_errors.space_issues" -> ["validity_errors", "space_issues"]
279+
parts = eval_details_name.split(".")
280+
281+
# 除了最后一部分,其他部分都是文件夹
282+
if len(parts) > 1:
283+
# 创建多层文件夹
284+
folder_path = os.path.join(field_dir, *parts[:-1])
285+
if not os.path.exists(folder_path):
286+
os.makedirs(folder_path)
287+
# 最后一部分作为文件名
288+
file_name = parts[-1] + ".jsonl"
289+
f_n = os.path.join(folder_path, file_name)
315290
else:
316-
str_json = json.dumps(result_info.to_dict(), ensure_ascii=False)
317-
f.write(str_json + "\n")
291+
# 没有点分割,直接在字段文件夹下创建文件
292+
f_n = os.path.join(field_dir, parts[0] + ".jsonl")
293+
294+
with open(f_n, "a", encoding="utf-8") as f:
295+
if input_args.executor.result_save.raw:
296+
str_json = json.dumps(result_info.to_raw_dict(), ensure_ascii=False)
297+
else:
298+
str_json = json.dumps(result_info.to_dict(), ensure_ascii=False)
299+
f.write(str_json + "\n")
318300

319301
def write_summary(self, path: str, input_args: InputArgs, summary: SummaryModel):
320302
if not input_args.executor.result_save.bad:

0 commit comments

Comments
 (0)