diff --git a/README.md b/README.md
index 27f51d12..7278c699 100644
--- a/README.md
+++ b/README.md
@@ -297,7 +297,8 @@ from dingo.model import Model
 from dingo.model.rule.base import BaseRule
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
-from dingo.model.modelres import ModelRes
+from dingo.io.output.eval_detail import EvalDetail
+
 
 @Model.rule_register('QUALITY_BAD_RELEVANCE', ['default'])
 class MyCustomRule(BaseRule):
@@ -306,8 +307,8 @@ class MyCustomRule(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'your_pattern_here')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail()
         # Your rule implementation here
         return res
 ```
diff --git a/README_ja.md b/README_ja.md
index 61023e97..5727140f 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -290,7 +290,8 @@ from dingo.model import Model
 from dingo.model.rule.base import BaseRule
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
-from dingo.model.modelres import ModelRes
+from dingo.io.output.eval_detail import EvalDetail
+
 
 @Model.rule_register('QUALITY_BAD_RELEVANCE', ['default'])
 class MyCustomRule(BaseRule):
@@ -299,8 +300,8 @@ class MyCustomRule(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'your_pattern_here')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail()
         # ここにルール実装
         return res
 ```
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 08c7601a..ebf7a2bc 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -296,7 +296,8 @@ from dingo.model import Model
 from dingo.model.rule.base import BaseRule
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
-from dingo.model.modelres import ModelRes
+from dingo.io.output.eval_detail import EvalDetail
+
 
 @Model.rule_register('QUALITY_BAD_RELEVANCE', ['default'])
 class MyCustomRule(BaseRule):
@@ -305,8 +306,8 @@ class MyCustomRule(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'your_pattern_here')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail()
         # 您的规则实现
         return res
 ```
diff --git a/dingo/exec/local.py b/dingo/exec/local.py
index 185fd4a9..0b723355 100644
--- a/dingo/exec/local.py
+++ b/dingo/exec/local.py
@@ -10,14 +10,12 @@
 from tqdm import tqdm
 
 from dingo.config import InputArgs
-from dingo.config.input_args import EvalPipline
 from dingo.data import Dataset, DataSource, dataset_map, datasource_map
 from dingo.exec.base import ExecProto, Executor
 from dingo.io import Data, ResultInfo, SummaryModel
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base import BaseLLM
-from dingo.model.modelres import EvalDetail, ModelRes
-from dingo.model.rule.base import BaseRule
 from dingo.utils import log
 
 
@@ -110,23 +108,20 @@ def execute(self) -> SummaryModel:
                     futures_results = self.merge_result_info(futures_results, result_info)
 
                 for result_info in futures_results:
-                    # 统计eval_details，第一层key是字段名组合，第二层value是EvalDetail
+                    # 统计eval_details，第一层key是字段名组合，第二层value是List[EvalDetail]
                     # 错误类型从EvalDetail.label中获取
-                    for field_key, eval_detail in result_info.eval_details.items():
+                    for field_key, eval_detail_list in result_info.eval_details.items():
                         if field_key not in self.summary.type_ratio:
                             self.summary.type_ratio[field_key] = {}
-                        # 遍历 EvalDetail.label 中的每个错误类型
-                        # 兼容 dict 和 EvalDetail 对象两种情况
-                        if isinstance(eval_detail, dict):
-                            label_list = eval_detail.get('label', [])
-                        else:
-                            label_list = eval_detail.label
-
-                        for eval_details_name in label_list:
-                            if eval_details_name not in self.summary.type_ratio[field_key]:
-                                self.summary.type_ratio[field_key][eval_details_name] = 1
-                            else:
-                                self.summary.type_ratio[field_key][eval_details_name] += 1
+                        # 遍历 List[EvalDetail]
+                        for eval_detail in eval_detail_list:
+                            # 获取label列表
+                            label_list = eval_detail.label if eval_detail.label else []
+                            for label in label_list:
+                                if label not in self.summary.type_ratio[field_key]:
+                                    self.summary.type_ratio[field_key][label] = 1
+                                else:
+                                    self.summary.type_ratio[field_key][label] += 1
 
                     if result_info.eval_status:
                         self.summary.num_bad += 1
@@ -166,8 +161,7 @@ def evaluate_single_data(self, dingo_id: str, eval_fields: dict, eval_type: str,
             ResultInfo containing evaluation results
         """
         result_info = ResultInfo(dingo_id=dingo_id)
-        bad_eval_details = None
-        good_eval_details = None
+        eval_detail_list = []
 
         for e_c_i in eval_list:
             # Get model class and instantiate
@@ -183,55 +177,32 @@ def evaluate_single_data(self, dingo_id: str, eval_fields: dict, eval_type: str,
                 raise ValueError(f"Error eval_type: {eval_type}")
 
             # Execute evaluation
-            tmp: ModelRes = model.eval(Data(**map_data))
-            if isinstance(tmp.eval_details, dict):
-                tmp.eval_details = EvalDetail(**tmp.eval_details)
+            tmp: EvalDetail = model.eval(Data(**map_data))
 
-            # Collect eval_details from ModelRes
-            if tmp.eval_status:
+            # 直接添加EvalDetail到列表中，不再merge
+            eval_detail_list.append(tmp)
+
+            # 如果任意一个EvalDetail的status为True，则result_info.eval_status为True
+            if tmp.status:
                 result_info.eval_status = True
-                # 合并 bad 的 eval_details (ModelRes.eval_details 现在直接是 EvalDetail)
-                if isinstance(bad_eval_details, dict):
-                    bad_eval_details = EvalDetail(**bad_eval_details)
-                if bad_eval_details:
-                    bad_eval_details.merge(tmp.eval_details)
-                else:
-                    bad_eval_details = tmp.eval_details.copy()
-            else:
-                # 合并 good 的 eval_details (ModelRes.eval_details 现在直接是 EvalDetail)
-                if isinstance(good_eval_details, dict):
-                    good_eval_details = EvalDetail(**good_eval_details)
-                if good_eval_details:
-                    good_eval_details.merge(tmp.eval_details)
-                else:
-                    good_eval_details = tmp.eval_details.copy()
 
-        # Set result_info fields based on all_labels configuration and add field
-        join_fields = ','.join(eval_fields.values())
+        # Set result_info fields
+        join_fields = ','.join(eval_fields.values()) if eval_fields else 'default'
 
+        # 根据配置决定保存哪些结果
         if self.input_args.executor.result_save.all_labels:
-            # Always include both good and bad results when they exist
-            # The final eval_status is True if ANY evaluation failed
-            # 合并 good 和 bad 的 eval_details (现在是 EvalDetail 对象)
-            all_eval_details = None
-            if bad_eval_details:
-                all_eval_details = bad_eval_details.copy()
-            if good_eval_details:
-                if all_eval_details:
-                    all_eval_details.merge(good_eval_details)
-                else:
-                    all_eval_details = good_eval_details.copy()
-            # add field (ResultInfo.eval_details 现在是 Dict[str, EvalDetail])
-            if all_eval_details:
-                result_info.eval_details = {join_fields: all_eval_details}
+            # 保存所有结果
+            if eval_detail_list:
+                result_info.eval_details = {join_fields: eval_detail_list}
         else:
-            # add field (ResultInfo.eval_details 现在是 Dict[str, EvalDetail])
+            # 只保存bad或good的结果
             if result_info.eval_status:
-                if bad_eval_details:
-                    result_info.eval_details = {join_fields: bad_eval_details}
+                # 有bad结果，只保留status=True的EvalDetail
+                result_info.eval_details = {join_fields: [mr for mr in eval_detail_list if mr.status]}
             else:
-                if good_eval_details and self.input_args.executor.result_save.good:
-                    result_info.eval_details = {join_fields: good_eval_details}
+                # 都是good结果，根据配置决定是否保存，只保留status=False的EvalDetail
+                if self.input_args.executor.result_save.good:
+                    result_info.eval_details = {join_fields: [mr for mr in eval_detail_list if not mr.status]}
 
         return result_info
 
@@ -241,14 +212,14 @@ def merge_result_info(self, existing_list: List[ResultInfo], new_item: ResultInf
         if existing_item:
             existing_item.eval_status = existing_item.eval_status or new_item.eval_status
 
-            # 合并 eval_details 字典（第一层是字段名，第二层直接是 EvalDetail）
+            # 合并 eval_details 字典（第一层是字段名，第二层是List[EvalDetail]）
             for key, value in new_item.eval_details.items():
-                # 第一层是字段名，如果存在，则合并 EvalDetail
+                # 第一层是字段名，如果存在，则extend List[EvalDetail]
                 if key in existing_item.eval_details:
-                    existing_item.eval_details[key].merge(value)
-                # 第一层是字段名，如果不存在，则创建副本
+                    existing_item.eval_details[key].extend(value)
+                # 第一层是字段名，如果不存在，则直接赋值
                 else:
-                    existing_item.eval_details[key] = value.copy()
+                    existing_item.eval_details[key] = value
         else:
             existing_list.append(new_item)
 
@@ -279,42 +250,53 @@ def write_single_data(
         if not input_args.executor.result_save.good and not result_info.eval_status:
             return
 
-        # 遍历 eval_details 的第一层（字段名组合），第二层直接是 EvalDetail
-        for field_name, eval_detail in result_info.eval_details.items():
+        # 用集合记录已经写过的(字段名, label名)组合，避免重复写入
+        written_labels = set()
+
+        # 遍历 eval_details 的第一层（字段名组合），第二层是List[EvalDetail]
+        for field_name, eval_detail_list in result_info.eval_details.items():
             # 第一层：根据字段名创建文件夹
             field_dir = os.path.join(path, field_name)
             if not os.path.exists(field_dir):
                 os.makedirs(field_dir)
 
-            # 从 EvalDetail.label 中获取错误类型列表
-            if isinstance(eval_detail, dict):
-                label_list = eval_detail.get('label', [])
-            else:
-                label_list = eval_detail.label
-            for eval_details_name in label_list:
-                # 按点分割错误类型名称，创建多层文件夹
-                # 例如: "validity_errors.space_issues" -> ["validity_errors", "space_issues"]
-                parts = eval_details_name.split(".")
-
-                # 除了最后一部分，其他部分都是文件夹
-                if len(parts) > 1:
-                    # 创建多层文件夹
-                    folder_path = os.path.join(field_dir, *parts[:-1])
-                    if not os.path.exists(folder_path):
-                        os.makedirs(folder_path)
-                    # 最后一部分作为文件名
-                    file_name = parts[-1] + ".jsonl"
-                    f_n = os.path.join(folder_path, file_name)
-                else:
-                    # 没有点分割，直接在字段文件夹下创建文件
-                    f_n = os.path.join(field_dir, parts[0] + ".jsonl")
-
-                with open(f_n, "a", encoding="utf-8") as f:
-                    if input_args.executor.result_save.raw:
-                        str_json = json.dumps(result_info.to_raw_dict(), ensure_ascii=False)
+            # 遍历 List[EvalDetail]
+            for eval_detail in eval_detail_list:
+                # 从 EvalDetail.label 中获取错误类型列表
+                label_list = eval_detail.label if eval_detail.label else []
+
+                for eval_details_name in label_list:
+                    # 检查是否已经写过这个(字段名, label名)组合
+                    label_key = (field_name, eval_details_name)
+                    if label_key in written_labels:
+                        continue
+
+                    # 标记为已写入
+                    written_labels.add(label_key)
+
+                    # 按点分割错误类型名称，创建多层文件夹
+                    # 例如: "validity_errors.space_issues" -> ["validity_errors", "space_issues"]
+                    parts = eval_details_name.split(".")
+
+                    # 除了最后一部分，其他部分都是文件夹
+                    if len(parts) > 1:
+                        # 创建多层文件夹
+                        folder_path = os.path.join(field_dir, *parts[:-1])
+                        if not os.path.exists(folder_path):
+                            os.makedirs(folder_path)
+                        # 最后一部分作为文件名
+                        file_name = parts[-1] + ".jsonl"
+                        f_n = os.path.join(folder_path, file_name)
                     else:
-                        str_json = json.dumps(result_info.to_dict(), ensure_ascii=False)
-                    f.write(str_json + "\n")
+                        # 没有点分割，直接在字段文件夹下创建文件
+                        f_n = os.path.join(field_dir, parts[0] + ".jsonl")
+
+                    with open(f_n, "a", encoding="utf-8") as f:
+                        if input_args.executor.result_save.raw:
+                            str_json = json.dumps(result_info.to_raw_dict(), ensure_ascii=False)
+                        else:
+                            str_json = json.dumps(result_info.to_dict(), ensure_ascii=False)
+                        f.write(str_json + "\n")
 
     def write_summary(self, path: str, input_args: InputArgs, summary: SummaryModel):
         if not input_args.executor.result_save.bad:
diff --git a/dingo/exec/spark.py b/dingo/exec/spark.py
index 64256665..7d936bae 100644
--- a/dingo/exec/spark.py
+++ b/dingo/exec/spark.py
@@ -1,7 +1,7 @@
 import copy
 import time
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, Optional
 
 from pyspark import SparkConf
 from pyspark.rdd import RDD
@@ -10,11 +10,10 @@
 from dingo.config import InputArgs
 from dingo.exec.base import ExecProto, Executor
 from dingo.io import Data, ResultInfo, SummaryModel
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
-from dingo.model.llm.base import BaseLLM
-from dingo.model.modelres import ModelRes
+
 # from dingo.model.prompt.base import BasePrompt
-from dingo.model.rule.base import BaseRule
 
 
 @Executor.register("spark")
@@ -154,20 +153,20 @@ def evaluate(self, data_rdd_item) -> Dict[str, Any]:
                 else:
                     raise ValueError(f"Error eval_type: {eval_type}")
 
-            if r_i.eval_status:
-                result_info.eval_status = True
-            for k,v in r_i.eval_details.items():
-                if k not in result_info.eval_details:
-                    result_info.eval_details[k] = v
-                else:
-                    result_info.eval_details[k].merge(v)
+                if r_i.eval_status:
+                    result_info.eval_status = True
+                # Merge eval_details: Dict[str, List[EvalDetail]]
+                for k, v in r_i.eval_details.items():
+                    if k not in result_info.eval_details:
+                        result_info.eval_details[k] = v
+                    else:
+                        result_info.eval_details[k].extend(v)
 
         return result_info.to_dict()
 
     def evaluate_item(self, eval_fields: dict, eval_type: str, map_data: dict, eval_list: list) -> ResultInfo:
         result_info = ResultInfo()
-        bad_eval_details = None
-        good_eval_details = None
+        eval_detail_list = []
 
         for e_c_i in eval_list:
             if eval_type == 'rule':
@@ -178,40 +177,32 @@ def evaluate_item(self, eval_fields: dict, eval_type: str, map_data: dict, eval_
                 Model.set_config_llm(model, e_c_i.config)
             else:
                 raise ValueError(f"Error eval_type: {eval_type}")
-            tmp: ModelRes = model.eval(Data(**map_data))
-            # Collect eval_details from ModelRes
-            if tmp.eval_status:
+
+            tmp: EvalDetail = model.eval(Data(**map_data))
+            eval_detail_list.append(tmp)
+
+            # If any EvalDetail's status is True, result_info.eval_status is True
+            if tmp.status:
                 result_info.eval_status = True
-                if bad_eval_details:
-                    bad_eval_details.merge(tmp.eval_details)
-                else:
-                    bad_eval_details = tmp.eval_details.copy()
-            else:
-                if good_eval_details:
-                    good_eval_details.merge(tmp.eval_details)
-                else:
-                    good_eval_details = tmp.eval_details.copy()
 
-        # Set result_info fields based on all_labels configuration and add field
-        join_fields = ','.join(eval_fields.values())
+        # Set result_info fields
+        join_fields = ','.join(eval_fields.values()) if eval_fields else 'default'
+
+        # Decide which results to save based on configuration
         if self.input_args.executor.result_save.all_labels:
-            all_eval_details = None
-            if bad_eval_details:
-                all_eval_details = bad_eval_details.copy()
-            if good_eval_details:
-                if all_eval_details:
-                    all_eval_details.merge(good_eval_details)
-                else:
-                    all_eval_details = good_eval_details.copy()
-            if all_eval_details:
-                result_info.eval_details = {join_fields: all_eval_details}
+            # Save all results
+            if eval_detail_list:
+                result_info.eval_details = {join_fields: eval_detail_list}
         else:
+            # Only save bad or good results
             if result_info.eval_status:
-                if bad_eval_details:
-                    result_info.eval_details = {join_fields: bad_eval_details}
+                # Has bad results, only keep EvalDetail with status=True
+                result_info.eval_details = {join_fields: [ed for ed in eval_detail_list if ed.status]}
             else:
-                if good_eval_details and self.input_args.executor.result_save.good:
-                    result_info.eval_details = {join_fields: good_eval_details}
+                # All good results, decide whether to save based on configuration
+                if self.input_args.executor.result_save.good:
+                    result_info.eval_details = {join_fields: [ed for ed in eval_detail_list if not ed.status]}
+
         return result_info
 
     def summarize(self, summary: SummaryModel) -> SummaryModel:
@@ -231,20 +222,22 @@ def aggregate_eval_detailss(acc, item):
             """聚合单个 item 的 eval_details 到累加器中"""
             eval_details_dict = item.get('eval_details', {})
 
-            # 遍历第一层：字段名
-            for field_key, eval_detail_dict in eval_details_dict.items():
+            # 遍历第一层：字段名，第二层是 List[EvalDetail] (序列化为 list of dicts)
+            for field_key, eval_detail_list in eval_details_dict.items():
                 if field_key not in acc:
                     acc[field_key] = {}
 
-                # 从 EvalDetail 的 label 列表中获取错误类型
-                label_list = eval_detail_dict.get('label', []) if isinstance(eval_detail_dict, dict) else eval_detail_dict.label
-
-                # 统计每个 label 的出现次数
-                for label in label_list:
-                    if label not in acc[field_key]:
-                        acc[field_key][label] = 1
-                    else:
-                        acc[field_key][label] += 1
+                # 遍历 List[EvalDetail]
+                for eval_detail in eval_detail_list:
+                    # 从 EvalDetail 的 label 列表中获取错误类型
+                    label_list = eval_detail.get('label', []) if isinstance(eval_detail, dict) else eval_detail.label
+                    if label_list:
+                        # 统计每个 label 的出现次数
+                        for label in label_list:
+                            if label not in acc[field_key]:
+                                acc[field_key][label] = 1
+                            else:
+                                acc[field_key][label] += 1
 
             return acc
 
diff --git a/dingo/io/output/eval_detail.py b/dingo/io/output/eval_detail.py
new file mode 100644
index 00000000..f2073dca
--- /dev/null
+++ b/dingo/io/output/eval_detail.py
@@ -0,0 +1,18 @@
+from typing import Any, Dict, List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class QualityLabel:
+    """质量标签常量类"""
+    QUALITY_GOOD = "QUALITY_GOOD"  # Indicates pass the quality check
+    QUALITY_BAD_PREFIX = "QUALITY_BAD_"  # Indicates not pass the quality check
+
+
+class EvalDetail(BaseModel):
+    metric: str
+    status: bool = False
+
+    score: Optional[float] = None
+    label: Optional[list[str]] = None
+    reason: Optional[list] = None
diff --git a/dingo/io/output/result_info.py b/dingo/io/output/result_info.py
index d604c446..50666446 100644
--- a/dingo/io/output/result_info.py
+++ b/dingo/io/output/result_info.py
@@ -1,28 +1,44 @@
-from typing import Any, Dict, List
+from typing import Dict, List
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 
-from dingo.model.modelres import EvalDetail
+from dingo.io.output.eval_detail import EvalDetail
 
 
 class ResultInfo(BaseModel):
     dingo_id: str = ''
     raw_data: Dict = {}
     eval_status: bool = False
-    eval_details: Dict[str, EvalDetail] = {}
+    eval_details: Dict[str, List[EvalDetail]] = {}
 
     def to_dict(self):
+        """将ResultInfo转换为字典格式
+
+        Returns:
+            包含所有字段的字典，其中eval_details被转换为嵌套字典结构
+        """
         return {
             'dingo_id': self.dingo_id,
             'raw_data': self.raw_data,
             'eval_status': self.eval_status,
-            'eval_details': {k: v.to_dict() for k,v in self.eval_details.items()},
+            'eval_details': {
+                k: [model_res.model_dump() for model_res in v]
+                for k, v in self.eval_details.items()
+            },
         }
 
     def to_raw_dict(self):
+        """将ResultInfo合并到raw_data中
+
+        Returns:
+            包含原始数据和dingo_result的字典
+        """
         dingo_result = {
             'eval_status': self.eval_status,
-            'eval_details': {k: v.to_dict() for k,v in self.eval_details.items()},
+            'eval_details': {
+                k: [model_res.model_dump() for model_res in v]
+                for k, v in self.eval_details.items()
+            },
         }
         self.raw_data['dingo_result'] = dingo_result
         return self.raw_data
diff --git a/dingo/model/llm/base.py b/dingo/model/llm/base.py
index 237cd52b..778f7f1f 100644
--- a/dingo/model/llm/base.py
+++ b/dingo/model/llm/base.py
@@ -2,7 +2,7 @@
 
 from dingo.config.input_args import EvaluatorLLMArgs
 from dingo.io import Data
-from dingo.model.modelres import EvalDetail, ModelRes, QualityLabel
+from dingo.io.output.eval_detail import EvalDetail
 
 
 class BaseLLM:
@@ -12,5 +12,5 @@ class BaseLLM:
     dynamic_config: EvaluatorLLMArgs
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         raise NotImplementedError()
diff --git a/dingo/model/llm/base_lmdeploy_apiclient.py b/dingo/model/llm/base_lmdeploy_apiclient.py
index ac17541f..c3edc79a 100644
--- a/dingo/model/llm/base_lmdeploy_apiclient.py
+++ b/dingo/model/llm/base_lmdeploy_apiclient.py
@@ -6,8 +6,8 @@
 
 from dingo.config.input_args import EvaluatorLLMArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model.llm.base import BaseLLM
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.response.response_class import ResponseScoreReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError, ExceedMaxTokens
@@ -44,7 +44,7 @@ def send_messages(cls, messages: List):
         return str(response)
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -60,30 +60,20 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseScoreReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         # eval_status
         if response_model.score == 1:
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.label = [QualityLabel.QUALITY_GOOD]
+            result.reason = [response_model.reason]
         else:
-            result.eval_status = True
-            # result.type = cls.prompt.metric_type
-            # result.name = cls.prompt.__name__
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [f"QUALITY_BAD.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.status = True
+            result.label = [f"QUALITY_BAD.{cls.__name__}"]
+            result.reason = [response_model.reason]
 
         return result
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         if cls.client is None:
             cls.create_client()
 
@@ -106,11 +96,8 @@ def eval(cls, input_data: Data) -> ModelRes:
                 except_msg = str(e)
                 except_name = e.__class__.__name__
 
-        res = ModelRes()
-        res.eval_status = True
-        res.eval_details = {
-            "label": [f"QUALITY_BAD.{except_name}"],
-            "metric": [cls.__name__],
-            "reason": [except_msg]
-        }
+        res = EvalDetail(metric=cls.__name__)
+        res.status = True
+        res.label = [f"QUALITY_BAD.{except_name}"]
+        res.reason = [except_msg]
         return res
diff --git a/dingo/model/llm/base_openai.py b/dingo/model/llm/base_openai.py
index db717cf0..64ca31ec 100644
--- a/dingo/model/llm/base_openai.py
+++ b/dingo/model/llm/base_openai.py
@@ -6,8 +6,8 @@
 
 from dingo.config.input_args import EvaluatorLLMArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model.llm.base import BaseLLM
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.response.response_class import ResponseScoreReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError, ExceedMaxTokens
@@ -111,7 +111,7 @@ def validate_config(cls, parameters: Dict):
             )
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -127,26 +127,31 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseScoreReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         # eval_status
         if response_model.score == 1:
-            result.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            # result.eval_details = {
+            #     "label": [QualityLabel.QUALITY_GOOD],
+            #     "metric": [cls.__name__],
+            #     "reason": [response_model.reason]
+            # }
+            result.label = [QualityLabel.QUALITY_GOOD]
+            result.reason = [response_model.reason]
         else:
-            result.eval_status = True
-            result.eval_details = {
-                "label": [f"QUALITY_BAD.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            # result.eval_status = True
+            # result.eval_details = {
+            #     "label": [f"QUALITY_BAD.{cls.__name__}"],
+            #     "metric": [cls.__name__],
+            #     "reason": [response_model.reason]
+            # }
+            result.status = True
+            result.label = [f"QUALITY_BAD.{cls.__name__}"]
+            result.reason = [response_model.reason]
 
         return result
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         if cls.client is None:
             cls.create_client()
 
@@ -158,7 +163,7 @@ def eval(cls, input_data: Data) -> ModelRes:
         while attempts < 3:
             try:
                 response = cls.send_messages(messages)
-                res: ModelRes = cls.process_response(response)
+                res: EvalDetail = cls.process_response(response)
                 return res
             except (ValidationError, ExceedMaxTokens, ConvertJsonError) as e:
                 except_msg = str(e)
@@ -170,11 +175,14 @@ def eval(cls, input_data: Data) -> ModelRes:
                 except_msg = str(e)
                 except_name = e.__class__.__name__
 
-        res = ModelRes()
-        res.eval_status = True
-        res.eval_details = {
-            "label": [f"QUALITY_BAD.{except_name}"],
-            "metric": [cls.__name__],
-            "reason": [except_msg]
-        }
+        res = EvalDetail(metric=cls.__name__)
+        # res.eval_status = True
+        # res.eval_details = {
+        #     "label": [f"QUALITY_BAD.{except_name}"],
+        #     "metric": [cls.__name__],
+        #     "reason": [except_msg]
+        # }
+        res.status = True
+        res.label = [f"QUALITY_BAD.{except_name}"]
+        res.reason = [except_msg]
         return res
diff --git a/dingo/model/llm/compare/llm_code_compare.py b/dingo/model/llm/compare/llm_code_compare.py
index 7f5f7725..8aba3599 100644
--- a/dingo/model/llm/compare/llm_code_compare.py
+++ b/dingo/model/llm/compare/llm_code_compare.py
@@ -3,9 +3,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -138,7 +138,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         # 提取思考内容和清理响应
@@ -183,28 +183,22 @@ def _clean_response(response: str) -> str:
         return response
 
     @staticmethod
-    def _create_no_code_result(response_json: dict) -> ModelRes:
-        result = ModelRes()
-        result.eval_status = False
-        result.eval_details = {
-            "label": ["NO_CODE.code"],
-            "metric": ["LLMCodeCompare"],
-            "reason": [json.dumps(response_json, ensure_ascii=False)]
-        }
+    def _create_no_code_result(response_json: dict) -> EvalDetail:
+        result = EvalDetail(metric="LLMCodeCompare")
+        result.status = False
+        result.label = ["NO_CODE.code"]
+        result.reason = [json.dumps(response_json, ensure_ascii=False)]
 
         return result
 
     @staticmethod
-    def _create_normal_result(response_json: dict) -> ModelRes:
-        result = ModelRes()
+    def _create_normal_result(response_json: dict) -> EvalDetail:
+        result = EvalDetail(metric="LLMCodeCompare")
         score = response_json.get('score', 0)
 
-        result.eval_status = score != 1
+        result.status = score != 1
         tmp_type = {1: 'TOOL_ONE_BETTER', 2: 'TOOL_TWO_BETTER'}.get(score, 'TOOL_EQUAL')
-        result.eval_details = {
-            "label": [f"{tmp_type}.code"],
-            "metric": ["LLMCodeCompare"],
-            "reason": [json.dumps(response_json, ensure_ascii=False)]
-        }
+        result.label = [f"{tmp_type}.code"]
+        result.reason = [json.dumps(response_json, ensure_ascii=False)]
 
         return result
diff --git a/dingo/model/llm/compare/llm_html_extract_compare.py b/dingo/model/llm/compare/llm_html_extract_compare.py
index 0215b583..72b9836a 100644
--- a/dingo/model/llm/compare/llm_html_extract_compare.py
+++ b/dingo/model/llm/compare/llm_html_extract_compare.py
@@ -3,9 +3,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.model.response.response_class import ResponseScoreTypeNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -107,7 +107,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         response_think = ""
@@ -133,10 +133,10 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseScoreTypeNameReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         # status
         if response_model.score != 1:
-            result.eval_status = True
+            result.status = True
 
         # type
         # if response_model.score == 1:
@@ -159,11 +159,7 @@ def process_response(cls, response: str) -> ModelRes:
             tmp_type = "TOOL_TWO_BETTER"
         if response_model.score == 0:
             tmp_type = "TOOL_EQUAL"
-
-        result.eval_details = {
-            "label": [f"{tmp_type}.{response_model.name}"],
-            "metric": [cls.__name__],
-            "reason": [json.dumps(response_json, ensure_ascii=False)]
-        }
+        result.label = [f"{tmp_type}.{response_model.name}"]
+        result.reason = [json.dumps(response_json, ensure_ascii=False)]
 
         return result
diff --git a/dingo/model/llm/compare/llm_html_extract_compare_en.py b/dingo/model/llm/compare/llm_html_extract_compare_en.py
index f4b29234..fae84cc1 100644
--- a/dingo/model/llm/compare/llm_html_extract_compare_en.py
+++ b/dingo/model/llm/compare/llm_html_extract_compare_en.py
@@ -3,9 +3,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.model.response.response_class import ResponseScoreTypeNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -79,7 +79,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         response_think = ""
@@ -105,10 +105,10 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseScoreTypeNameReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         # status
         if response_model.score != 1:
-            result.eval_status = True
+            result.status = True
 
         # type
         # if response_model.score == 1:
@@ -131,11 +131,7 @@ def process_response(cls, response: str) -> ModelRes:
             tmp_type = "TOOL_TWO_BETTER"
         if response_model.score == 0:
             tmp_type = "TOOL_EQUAL"
-
-        result.eval_details = {
-            "label": [f"{tmp_type}.{response_model.name}"],
-            "metric": [cls.__name__],
-            "reason": [json.dumps(response_json, ensure_ascii=False)]
-        }
+        result.label = [f"{tmp_type}.{response_model.name}"]
+        result.reason = [json.dumps(response_json, ensure_ascii=False)]
 
         return result
diff --git a/dingo/model/llm/compare/llm_html_extract_compare_v2.py b/dingo/model/llm/compare/llm_html_extract_compare_v2.py
index 2f4c9410..891ac673 100644
--- a/dingo/model/llm/compare/llm_html_extract_compare_v2.py
+++ b/dingo/model/llm/compare/llm_html_extract_compare_v2.py
@@ -4,9 +4,9 @@
 import diff_match_patch as dmp_module
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.model.response.response_class import ResponseNameReason
 from dingo.utils import log
 
@@ -244,9 +244,9 @@ def _parse_response_to_structured(cls, response: str) -> ResponseNameReason:
         )
 
     @classmethod
-    def _convert_to_model_result(cls, structured_response: ResponseNameReason) -> ModelRes:
+    def _convert_to_model_result(cls, structured_response: ResponseNameReason) -> EvalDetail:
         """
-        将结构化响应转换为 ModelRes 对象
+        将结构化响应转换为 EvalDetail 对象
 
         映射规则：
         - A -> TOOL_ONE_BETTER (工具A更好，eval_status=False)
@@ -257,9 +257,9 @@ def _convert_to_model_result(cls, structured_response: ResponseNameReason) -> Mo
             structured_response: 结构化响应对象，name 字段存储判断结果 (A/B/C)
 
         Returns:
-            ModelRes: 评估结果对象
+            EvalDetail: 评估结果对象
         """
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
 
         # 从 name 字段获取判断结果
         judgement = structured_response.name
@@ -287,29 +287,26 @@ def _convert_to_model_result(cls, structured_response: ResponseNameReason) -> Mo
         if not mapping:
             raise ValueError(f"无效的判断结果: {judgement}")
 
-        result.eval_status = mapping["eval_status"]
+        result.status = mapping["eval_status"]
         # result.type = mapping["type"]
         # result.name = f"Judgement_{judgement}"
         # result.reason = [structured_response.reason]
 
         tmp_type = mapping["type"]
         tmp_name = f"Judgement_{judgement}"
-        result.eval_details = {
-            "label": [f"{tmp_type}.{tmp_name}"],
-            "metric": [cls.__name__],
-            "reason": [structured_response.reason]
-        }
+        result.label = [f"{tmp_type}.{tmp_name}"]
+        result.reason = [structured_response.reason]
 
         return result
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         """
         处理 LLM 返回结果
 
         数据流：
         1. 原始响应 (str) -> 结构化响应 (ResponseNameReason)
-        2. 结构化响应 -> 评估结果 (ModelRes)
+        2. 结构化响应 -> 评估结果 (EvalDetail)
 
         这种分层设计的好处：
         - 更清晰的责任分离
@@ -321,7 +318,7 @@ def process_response(cls, response: str) -> ModelRes:
             response: LLM 原始响应文本
 
         Returns:
-            ModelRes: 评估结果对象
+            EvalDetail: 评估结果对象
         """
         # 步骤1: 解析为结构化响应
         structured_response = cls._parse_response_to_structured(response)
diff --git a/dingo/model/llm/compare/llm_math_compare.py b/dingo/model/llm/compare/llm_math_compare.py
index 13285d0d..014b89cb 100644
--- a/dingo/model/llm/compare/llm_math_compare.py
+++ b/dingo/model/llm/compare/llm_math_compare.py
@@ -3,9 +3,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -136,7 +136,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         # 提取思考内容和清理响应
@@ -181,30 +181,25 @@ def _clean_response(response: str) -> str:
         return response
 
     @staticmethod
-    def _create_no_formula_result(response_json: dict) -> ModelRes:
-        result = ModelRes()
-        result.eval_status = False
-        result.eval_details = {
-            "label": ["NO_FORMULA.math"],
-            "metric": ["LLMMathCompare"],
-            "reason": [json.dumps(response_json, ensure_ascii=False)]
-        }
+    def _create_no_formula_result(response_json: dict) -> EvalDetail:
+        result = EvalDetail(metric="LLMMathCompare")
+        result.status = False
+        result.label = ["NO_FORMULA.math"]
+
+        result.reason = [json.dumps(response_json, ensure_ascii=False)]
         return result
 
     @staticmethod
-    def _create_normal_result(response_json: dict) -> ModelRes:
-        result = ModelRes()
+    def _create_normal_result(response_json: dict) -> EvalDetail:
+        result = EvalDetail(metric="LLMMathCompare")
         score = response_json.get('score', 0)
 
-        result.eval_status = score != 1
+        result.status = score != 1
         # result.type = {1: 'TOOL_ONE_BETTER', 2: 'TOOL_TWO_BETTER'}.get(score, 'TOOL_EQUAL')
         # result.name = 'math'
         # result.reason = [json.dumps(response_json, ensure_ascii=False)]
 
         tmp_type = {1: 'TOOL_ONE_BETTER', 2: 'TOOL_TWO_BETTER'}.get(score, 'TOOL_EQUAL')
-        result.eval_details = {
-            "label": [f"{tmp_type}.math"],
-            "metric": ["LLMMathCompare"],
-            "reason": [json.dumps(response_json, ensure_ascii=False)]
-        }
+        result.label = [f"{tmp_type}.math"]
+        result.reason = [json.dumps(response_json, ensure_ascii=False)]
         return result
diff --git a/dingo/model/llm/compare/llm_table_compare.py b/dingo/model/llm/compare/llm_table_compare.py
index e1510a0e..1533e6ed 100644
--- a/dingo/model/llm/compare/llm_table_compare.py
+++ b/dingo/model/llm/compare/llm_table_compare.py
@@ -3,9 +3,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -136,7 +136,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         # 提取思考内容和清理响应
@@ -181,30 +181,25 @@ def _clean_response(response: str) -> str:
         return response
 
     @staticmethod
-    def _create_no_table_result(response_json: dict) -> ModelRes:
-        result = ModelRes()
-        result.eval_status = False
-        result.eval_details = {
-            "label": ["NO_TABLE.table"],
-            "metric": ["LLMTableCompare"],
-            "reason": [json.dumps(response_json, ensure_ascii=False)]
-        }
+    def _create_no_table_result(response_json: dict) -> EvalDetail:
+        result = EvalDetail(metric="LLMTableCompare")
+        result.status = False
+        result.label = ["NO_TABLE.table"]
+
+        result.reason = [json.dumps(response_json, ensure_ascii=False)]
         return result
 
     @staticmethod
-    def _create_normal_result(response_json: dict) -> ModelRes:
-        result = ModelRes()
+    def _create_normal_result(response_json: dict) -> EvalDetail:
+        result = EvalDetail(metric="LLMTableCompare")
         score = response_json.get('score', 0)
 
-        result.eval_status = score != 1
+        result.status = score != 1
         # result.type = {1: 'TOOL_ONE_BETTER', 2: 'TOOL_TWO_BETTER'}.get(score, 'TOOL_EQUAL')
         # result.name = 'table'
         # result.reason = [json.dumps(response_json, ensure_ascii=False)]
         tmp_type = {1: 'TOOL_ONE_BETTER', 2: 'TOOL_TWO_BETTER'}.get(score, 'TOOL_EQUAL')
-        result.eval_details = {
-            "label": [f"{tmp_type}.table"],
-            "metric": ["LLMMathCompare"],
-            "reason": [json.dumps(response_json, ensure_ascii=False)]
-        }
+        result.label = [f"{tmp_type}.table"]
+        result.reason = [json.dumps(response_json, ensure_ascii=False)]
 
         return result
diff --git a/dingo/model/llm/hhh/llm_text_3h.py b/dingo/model/llm/hhh/llm_text_3h.py
index 5cdf0866..919d6bca 100644
--- a/dingo/model/llm/hhh/llm_text_3h.py
+++ b/dingo/model/llm/hhh/llm_text_3h.py
@@ -1,8 +1,7 @@
 import json
 
-from dingo.model import Model
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.response.response_class import ResponseScoreReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -21,7 +20,7 @@ def build_messages(cls, input_data):
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -37,23 +36,17 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseScoreReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
 
         # eval_status
         if response_model.score == 1:
             tmp_name = cls.prompt.__name__[8:].upper()
-            result.eval_details = {
-                "label": [f"{QualityLabel.QUALITY_GOOD}.{tmp_name}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason] if response_model.reason else ["Response meets quality criteria"]
-            }
+            result.label = [f"{QualityLabel.QUALITY_GOOD}.{tmp_name}"]
+            result.reason = [response_model.reason] if response_model.reason else ["Response meets quality criteria"]
         else:
-            result.eval_status = True
+            result.status = True
             tmp_name = "NOT_" + cls.prompt.__name__[8:].upper()
-            result.eval_details = {
-                "label": [f"QUALITY_BAD.{tmp_name}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason] if response_model.reason else ["Response fails quality criteria"]
-            }
+            result.label = [f"QUALITY_BAD.{tmp_name}"]
+            result.reason = [response_model.reason] if response_model.reason else ["Response fails quality criteria"]
 
         return result
diff --git a/dingo/model/llm/llm_classify_qr.py b/dingo/model/llm/llm_classify_qr.py
index 03fcf7fa..ebf9f28e 100644
--- a/dingo/model/llm/llm_classify_qr.py
+++ b/dingo/model/llm/llm_classify_qr.py
@@ -2,9 +2,9 @@
 from typing import List
 
 from dingo.io.input import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.model.response.response_class import ResponseNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -44,7 +44,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -60,16 +60,9 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseNameReason(**response_json)
 
-        result = ModelRes()
-        result.eval_status = False
-        # result.type = cls.prompt.metric_type
-        # result.name = response_model.name
-        # result.reason = [response_model.reason]
-
-        result.eval_details = {
-            "label": [f"{cls.__name__}.{response_model.name}"],
-            "metric": [cls.__name__],
-            "reason": [response_model.reason]
-        }
+        result = EvalDetail(metric=cls.__name__)
+        result.status = False
+        result.label = [f"{cls.__name__}.{response_model.name}"]
+        result.reason = [response_model.reason]
 
         return result
diff --git a/dingo/model/llm/llm_classify_topic.py b/dingo/model/llm/llm_classify_topic.py
index d36ffd6a..9dcf4a0b 100644
--- a/dingo/model/llm/llm_classify_topic.py
+++ b/dingo/model/llm/llm_classify_topic.py
@@ -1,8 +1,8 @@
 import json
 
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.model.response.response_class import ResponseNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -46,7 +46,7 @@ class LLMClassifyTopic(BaseOpenAI):
     """
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -62,16 +62,9 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseNameReason(**response_json)
 
-        result = ModelRes()
-        result.eval_status = False
-        # result.type = cls.prompt.metric_type
-        # result.name = response_model.name
-        # result.reason = [response_model.reason]
-
-        result.eval_details = {
-            "label": [f"{cls.__name__}.{response_model.name}"],
-            "metric": [cls.__name__],
-            "reason": [response_model.reason]
-        }
+        result = EvalDetail(metric=cls.__name__)
+        result.status = False
+        result.label = [f"{cls.__name__}.{response_model.name}"]
+        result.reason = [response_model.reason]
 
         return result
diff --git a/dingo/model/llm/llm_dataman_assessment.py b/dingo/model/llm/llm_dataman_assessment.py
index 3163aaff..468cbc52 100644
--- a/dingo/model/llm/llm_dataman_assessment.py
+++ b/dingo/model/llm/llm_dataman_assessment.py
@@ -1,8 +1,8 @@
 import json
 
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.model.response.response_class import ResponseScoreTypeNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -103,7 +103,7 @@ class LLMDatamanAssessment(BaseOpenAI):
     """
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -121,26 +121,14 @@ def process_response(cls, response: str) -> ModelRes:
         # Parse the response using the ResponseScoreTypeNameReason model
         response_model = ResponseScoreTypeNameReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         # Set eval_status based on score (1 = good quality, 0 = low quality)
         if response_model.score == 1:
-            result.eval_status = False
+            result.status = False
         else:
-            result.eval_status = True
-
-        # # Set type to the domain classification
-        # result.type = response_model.type
-        #
-        # # Set name to the quality category
-        # result.name = response_model.name
-        #
-        # # Set reason to the detailed assessment
-        # result.reason = [response_model.reason]
-
-        result.eval_details = {
-            "label": [f"{response_model.type}.{response_model.name}"],
-            "metric": [cls.__name__],
-            "reason": [response_model.reason]
-        }
+            result.status = True
+
+        result.label = [f"{response_model.type}.{response_model.name}"]
+        result.reason = [response_model.reason]
 
         return result
diff --git a/dingo/model/llm/llm_document_parsing_ocr.py b/dingo/model/llm/llm_document_parsing_ocr.py
index e58932e2..bb5465cd 100644
--- a/dingo/model/llm/llm_document_parsing_ocr.py
+++ b/dingo/model/llm/llm_document_parsing_ocr.py
@@ -1,15 +1,12 @@
-import base64
 import json
 import re
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
-from dingo.model.response.response_class import ResponseScoreReason
 from dingo.utils import log
-from dingo.utils.exception import ConvertJsonError
 
 
 @Model.llm_register("LLMMinerURecognizeQuality")
@@ -100,7 +97,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
         json_match = re.search(r'\{[\s\S]*"errors"[\s\S]*\}', response)
         types = []
@@ -124,18 +121,12 @@ def process_response(cls, response: str) -> ModelRes:
         else:
             log.error("未找到JSON内容")
 
-        result = ModelRes()
-        result.eval_status = False
-        # result.type = types
-        # result.name = names
-        # result.reason = [json_str] if 'json_str' in locals() else [response]
+        result = EvalDetail(metric=cls.__name__)
+        result.status = False
 
         tmp_type = '.'.join(types)
         tmp_name = '.'.join(names)
-        result.eval_details = {
-            "label": [f"{tmp_type}.{tmp_name}"],
-            "metric": [cls.__name__],
-            "reason": [json_str] if 'json_str' in locals() else [response]
-        }
+        result.label = [f"{tmp_type}.{tmp_name}"]
+        result.reason = [json_str] if 'json_str' in locals() else [response]
 
         return result
diff --git a/dingo/model/llm/llm_factcheck_public.py b/dingo/model/llm/llm_factcheck_public.py
index 59d20bbc..74b0177e 100644
--- a/dingo/model/llm/llm_factcheck_public.py
+++ b/dingo/model/llm/llm_factcheck_public.py
@@ -1,11 +1,10 @@
 from dataclasses import dataclass
-from typing import Dict, List, Literal, Optional
+from typing import Dict, List, Literal
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes, QualityLabel
-from dingo.utils.exception import ExceedMaxTokens
 
 
 @dataclass
@@ -191,7 +190,7 @@ class LLMFactCheckPublic(BaseOpenAI):
     }
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         """执行两阶段评估"""
         try:
             # 0. 初始化 client
@@ -201,12 +200,9 @@ def eval(cls, input_data: Data) -> ModelRes:
             # 1. 提取声明
             claims = cls._extract_claims(input_data.prompt, input_data.content)
             if not claims:
-                return ModelRes(
-                    # score=0.0,
-                    # threshold=cls.threshold,
-                    reason=["No factual claims found"],
-                    # raw_resp={"claims": [], "results": []}
-                )
+                result = EvalDetail(metric=cls.__name__)
+                result.reason = ["No factual claims found"]
+                return result
 
             # 2. 分批验证
             all_results = []
@@ -219,40 +215,24 @@ def eval(cls, input_data: Data) -> ModelRes:
             metrics = cls._calculate_metrics(all_results)
 
             # 4. 设置评估结果
-            result = ModelRes(
-                # score=metrics["factual_ratio"],
-                # threshold=cls.threshold,
-                reason=[cls._format_reason(metrics)],
-                # raw_resp={
-                #     "claims": claims,
-                #     "results": all_results,
-                #     "metrics": metrics
-                # }
-            )
+            result = EvalDetail(metric=cls.__name__)
+            result.reason = [cls._format_reason(metrics)]
 
             # 5. 根据分数设置状态
             if metrics["factual_ratio"] < cls.threshold:
-                result.eval_status = True
-                # result.type = "QUALITY_BAD_FACTUALITY"
-                # result.name = "FACTUALITY_CHECK_FAILED"
-                result.eval_details.label = ["QUALITY_BAD_FACTUALITY.FACTUALITY_CHECK_FAILED"]
+                result.status = True
+                result.label = ["QUALITY_BAD_FACTUALITY.FACTUALITY_CHECK_FAILED"]
             else:
-                # result.type = "QUALITY_GOOD"
-                # result.name = "FACTUALITY_CHECK_PASSED"
-                result.eval_details.label = [f"{QualityLabel.QUALITY_GOOD}.FACTUALITY_CHECK_PASSED"]
+                result.label = [f"{QualityLabel.QUALITY_GOOD}.FACTUALITY_CHECK_PASSED"]
 
             return result
 
         except Exception as e:
-            return ModelRes(
-                eval_status=True,
-                type="QUALITY_BAD_FACTUALITY",
-                name="FACTUALITY_CHECK_ERROR",
-                # score=0.0,
-                # threshold=cls.threshold,
-                reason=[f"Evaluation failed: {str(e)}"],
-                # raw_resp={"error": str(e)}
-            )
+            result = EvalDetail(metric=cls.__name__)
+            result.status = True
+            result.label = ["QUALITY_BAD_FACTUALITY.FACTUALITY_CHECK_ERROR"]
+            result.reason = [f"Evaluation failed: {str(e)}"]
+            return result
 
     @classmethod
     def _extract_claims(cls, prompt: str, response: str) -> List[str]:
diff --git a/dingo/model/llm/llm_hallucination.py b/dingo/model/llm/llm_hallucination.py
index 79407b77..36317858 100644
--- a/dingo/model/llm/llm_hallucination.py
+++ b/dingo/model/llm/llm_hallucination.py
@@ -1,11 +1,11 @@
 import json
-from typing import List, Union
+from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes, QualityLabel
-from dingo.model.response.response_hallucination import HallucinationScoreReason, HallucinationVerdict, HallucinationVerdicts
+from dingo.model.response.response_hallucination import HallucinationVerdict, HallucinationVerdicts
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -19,7 +19,7 @@ class LLMHallucination(BaseOpenAI):
     This implementation adapts DeepEval's verdict-based approach to Dingo's architecture:
     1. Generates verdicts for each context against the actual output
     2. Calculates hallucination score based on contradiction ratio
-    3. Returns standardized ModelRes with eval_status based on threshold
+    3. Returns standardized EvalDetail with eval_status based on threshold
     """
     # Metadata for documentation generation
     _metric_info = {
@@ -107,7 +107,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         """
         Process LLM response to calculate hallucination score.
         Follows DeepEval's approach:
@@ -142,27 +142,17 @@ def process_response(cls, response: str) -> ModelRes:
         # Generate detailed reason
         reason = cls._generate_reason(verdicts, score)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
 
         # Set eval_status based on threshold
         if score > cls.threshold:
-            result.eval_status = True
-            # result.type = "QUALITY_BAD_HALLUCINATION"
-            # result.name = "HALLUCINATION_DETECTED"
-            result.eval_details.label = ['QUALITY_BAD_HALLUCINATION.HALLUCINATION_DETECTED']
+            result.status = True
+            result.label = ['QUALITY_BAD_HALLUCINATION.HALLUCINATION_DETECTED']
         else:
-            # result.type = "QUALITY_GOOD"
-            # result.name = "NO_HALLUCINATION"
-            result.eval_details.label = [f'{QualityLabel.QUALITY_GOOD}.NO_HALLUCINATION']
+            result.label = [f'{QualityLabel.QUALITY_GOOD}.NO_HALLUCINATION']
 
         result.reason = [reason]
 
-        # Store additional metadata
-        # result.score = score
-        # result.verdict_details = [
-        #     f"{v.verdict}: {v.reason}" for v in verdicts
-        # ]
-
         log.info(f"Hallucination score: {score:.3f}, threshold: {cls.threshold}")
 
         return result
@@ -220,22 +210,17 @@ def _generate_reason(cls, verdicts: List[HallucinationVerdict], score: float) ->
         return "\n".join(reason_parts)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         """
         Override eval to add context validation
         """
         # Validate that context is provided
         if not hasattr(input_data, 'context') or not input_data.context:
-            return ModelRes(
-                eval_status=True,
-                # type="QUALITY_BAD",
-                # name="MISSING_CONTEXT",
-                # reason=["Context is required for hallucination detection but was not provided"]
-                eval_details = {
-                    "label": ["QUALITY_BAD.MISSING_CONTEXT"],
-                    "reason": ["Context is required for hallucination detection but was not provided"]
-                }
-            )
+            result = EvalDetail(metric=cls.__name__)
+            result.status = True
+            result.label = ["QUALITY_BAD.MISSING_CONTEXT"]
+            result.reason = ["Context is required for hallucination detection but was not provided"]
+            return result
 
         # Call parent eval method
         return super().eval(input_data)
diff --git a/dingo/model/llm/llm_long_video_qa.py b/dingo/model/llm/llm_long_video_qa.py
index 54178a5f..17af0e22 100644
--- a/dingo/model/llm/llm_long_video_qa.py
+++ b/dingo/model/llm/llm_long_video_qa.py
@@ -1,8 +1,6 @@
-import json
-
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 
 
@@ -115,18 +113,11 @@ class LLMLongVideoQa(BaseOpenAI):
             """
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
-        result = ModelRes()
-        result.eval_status = False
-        # result.type = "text"
-        # result.name = "qa_pairs"
-        # result.reason = [response]
-
-        result.eval_details = {
-            "label": ["text.qa_pairs"],
-            "metric": [cls.__name__],
-            "reason": [response]
-        }
+        result = EvalDetail(metric=cls.__name__)
+        result.status = False
+        result.label = ["text.qa_pairs"]
+        result.reason = [response]
 
         return result
diff --git a/dingo/model/llm/llm_perspective.py b/dingo/model/llm/llm_perspective.py
index 3fd86754..ec706f1a 100644
--- a/dingo/model/llm/llm_perspective.py
+++ b/dingo/model/llm/llm_perspective.py
@@ -2,9 +2,9 @@
 
 from dingo.config.input_args import EvaluatorLLMArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model import Model
 from dingo.model.llm.base import BaseLLM
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.utils import log
 
 
@@ -38,7 +38,7 @@ def create_client(cls):
                 )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         cls.create_client()
         analyze_request = {
             "comment": {"text": input_data.content},
@@ -69,43 +69,24 @@ def eval(cls, input_data: Data) -> ModelRes:
                         error_list.append(e)
 
                 if is_good:
-                    res = ModelRes()
-                    res.eval_status = False
-                    res.eval_details = {
-                        "label": [f"{QualityLabel.QUALITY_GOOD}.PERSPECTIVE"],
-                        "metric": [cls.__name__],
-                        "reason": []
-                    }
+                    res = EvalDetail(metric=cls.__name__)
+                    res.status = False
+                    res.label = [f"{QualityLabel.QUALITY_GOOD}.PERSPECTIVE"]
+                    res.reason = []
                     return res
                 else:
-                    # return ModelRes(
-                    #     eval_status=True,
-                    #     type="QUALITY_BAD",
-                    #     name="PERSPECTIVE",
-                    #     reason=error_list,
-                    # )
-                    res = ModelRes()
-                    res.eval_status = True
-                    res.eval_details = {
-                        "label": ["QUALITY_BAD.PERSPECTIVE"],
-                        "metric": [cls.__name__],
-                        "reason": error_list
-                    }
+                    res = EvalDetail(metric=cls.__name__)
+                    res.status = True
+                    res.label = ["QUALITY_BAD.PERSPECTIVE"]
+                    res.reason = error_list
                     return res
             except Exception as e:
                 attempts += 1
                 time.sleep(1)
                 except_msg = str(e)
 
-        # return ModelRes(
-        #     eval_status=True, type="QUALITY_BAD", name="API_LOSS", reason=[except_msg]
-        # )
-
-        res = ModelRes()
-        res.eval_status = True
-        res.eval_details = {
-            "label": ["QUALITY_BAD.API_LOSS"],
-            "metric": [cls.__name__],
-            "reason": [except_msg]
-        }
+        res = EvalDetail(metric=cls.__name__)
+        res.status = True
+        res.label = ["QUALITY_BAD.API_LOSS"]
+        res.reason = [except_msg]
         return res
diff --git a/dingo/model/llm/llm_resume_quality.py b/dingo/model/llm/llm_resume_quality.py
index 912b7afb..9b40a12e 100644
--- a/dingo/model/llm/llm_resume_quality.py
+++ b/dingo/model/llm/llm_resume_quality.py
@@ -1,8 +1,8 @@
 import json
 
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.response.response_class import ResponseScoreTypeNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -88,7 +88,7 @@ class LLMResumeQuality(BaseOpenAI):
     """
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         # Clean response format
@@ -107,23 +107,16 @@ def process_response(cls, response: str) -> ModelRes:
         # Validate response using Pydantic model
         response_model = ResponseScoreTypeNameReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
 
         # Check if resume is good quality
         if response_model.type == "Good" and response_model.score == 1:
-            result.eval_status = False
-            # result.type = "QUALITY_GOOD"
-            # result.name = "ResumeQualityGood"
-            # result.reason = [response_model.reason]
-
-            result.eval_details = {
-                "label": f"{QualityLabel.QUALITY_GOOD}.ResumeQualityGood",
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.status = False
+            result.label = [f"{QualityLabel.QUALITY_GOOD}.ResumeQualityGood"]
+            result.reason = [response_model.reason]
         else:
             # Resume has quality issues
-            result.eval_status = True
+            result.status = True
 
             # Map issue type to metric type
             type_mapping = {
@@ -136,16 +129,9 @@ def process_response(cls, response: str) -> ModelRes:
                 "Completeness": "RESUME_QUALITY_BAD_COMPLETENESS"
             }
 
-            # result.type = type_mapping.get(response_model.type, "RESUME_QUALITY_BAD")
-            # result.name = response_model.name
-            # result.reason = [response_model.reason]
-
             tmp_type = type_mapping.get(response_model.type, "RESUME_QUALITY_BAD")
             tmp_name = response_model.name
-            result.eval_details = {
-                "label": [f"{tmp_type}.{tmp_name}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.label = [f"{tmp_type}.{tmp_name}"]
+            result.reason = [response_model.reason]
 
         return result
diff --git a/dingo/model/llm/llm_text_chaos.py b/dingo/model/llm/llm_text_chaos.py
index fc52f844..f563d691 100644
--- a/dingo/model/llm/llm_text_chaos.py
+++ b/dingo/model/llm/llm_text_chaos.py
@@ -1,8 +1,8 @@
 import json
 
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.response.response_class import ResponseScoreTypeNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -19,7 +19,7 @@ class LLMTextChaos(BaseOpenAI):
     """
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -35,24 +35,14 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseScoreTypeNameReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         # eval_status
         if response_model.score == 1:
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [f"{QualityLabel.QUALITY_GOOD}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.label = [f"{QualityLabel.QUALITY_GOOD}.{cls.__name__}"]
+            result.reason = [response_model.reason]
         else:
-            result.eval_status = True
-            # result.type = response_model.type
-            # result.name = response_model.name
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [f"{response_model.type}.{response_model.name}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.status = True
+            result.label = [f"{response_model.type}.{response_model.name}"]
+            result.reason = [response_model.reason]
 
         return result
diff --git a/dingo/model/llm/llm_text_code_list_issue.py b/dingo/model/llm/llm_text_code_list_issue.py
index f1821373..47447e39 100644
--- a/dingo/model/llm/llm_text_code_list_issue.py
+++ b/dingo/model/llm/llm_text_code_list_issue.py
@@ -1,8 +1,8 @@
 import json
 
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.response.response_class import ResponseScoreTypeNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -32,7 +32,7 @@ class LLMTextCodeListIssue(BaseOpenAI):
     """
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -48,24 +48,14 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseScoreTypeNameReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         # eval_status
         if response_model.score == 1:
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.label = [QualityLabel.QUALITY_GOOD]
+            result.reason = [response_model.reason]
         else:
-            result.eval_status = True
-            # result.type = response_model.type
-            # result.name = response_model.name
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [f"{response_model.type}.{response_model.name}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.status = True
+            result.label = [f"{response_model.type}.{response_model.name}"]
+            result.reason = [response_model.reason]
 
         return result
diff --git a/dingo/model/llm/meta_rater/llm_meta_rater_cleanliness.py b/dingo/model/llm/meta_rater/llm_meta_rater_cleanliness.py
index ee200247..dedc3018 100644
--- a/dingo/model/llm/meta_rater/llm_meta_rater_cleanliness.py
+++ b/dingo/model/llm/meta_rater/llm_meta_rater_cleanliness.py
@@ -9,9 +9,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -95,7 +95,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         """
         Process the LLM response for Meta-rater Cleanliness evaluation.
 
@@ -103,7 +103,7 @@ def process_response(cls, response: str) -> ModelRes:
             response: Raw response string from the LLM
 
         Returns:
-            ModelRes: Processed evaluation results with score and reason
+            EvalDetail: Processed evaluation results with score and reason
         """
         log.info(response)
 
@@ -125,30 +125,24 @@ def process_response(cls, response: str) -> ModelRes:
         score = response_json.get('score', 0)
         reason = response_json.get('reason', '')
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
 
         # Meta-rater uses 1-5 scoring, with higher scores being better;
         # We normalize this to binary classification for compatibility
         # Scores >= 3 are considered "good quality", < 3 are "low quality"
         if score >= 3:
-            result.eval_status = False
+            result.status = False
             # result.type = cls.prompt.metric_type
             # result.name = "HighQuality"
             # result.reason = [f"Score: {score}/5. {reason}"]
-            result.eval_details = {
-                "label": [f"{cls.__name__}.HighQuality"],
-                "metric": [cls.__name__],
-                "reason": [f"Score: {score}/5. {reason}"]
-            }
+            result.label = [f"{cls.__name__}.HighQuality"]
+            result.reason = [f"Score: {score}/5. {reason}"]
         else:
-            result.eval_status = True
+            result.status = True
             # result.type = cls.prompt.metric_type
             # result.name = "LowQuality"
             # result.reason = [f"Score: {score}/5. {reason}"]
-            result.eval_details = {
-                "label": [f"{cls.__name__}.LowQuality"],
-                "metric": [cls.__name__],
-                "reason": [f"Score: {score}/5. {reason}"]
-            }
+            result.label = [f"{cls.__name__}.LowQuality"]
+            result.reason = [f"Score: {score}/5. {reason}"]
 
         return result
diff --git a/dingo/model/llm/meta_rater/llm_meta_rater_professionalism.py b/dingo/model/llm/meta_rater/llm_meta_rater_professionalism.py
index 513e8163..55b0ef13 100644
--- a/dingo/model/llm/meta_rater/llm_meta_rater_professionalism.py
+++ b/dingo/model/llm/meta_rater/llm_meta_rater_professionalism.py
@@ -10,9 +10,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -90,7 +90,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         """
         Process the LLM response for Meta-rater evaluation.
 
@@ -98,7 +98,7 @@ def process_response(cls, response: str) -> ModelRes:
             response: Raw response string from the LLM
 
         Returns:
-            ModelRes: Processed evaluation results with score and reason
+            EvalDetail: Processed evaluation results with score and reason
         """
         log.info(response)
 
@@ -120,30 +120,24 @@ def process_response(cls, response: str) -> ModelRes:
         score = response_json.get('score', 0)
         reason = response_json.get('reason', '')
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
 
         # Meta-rater uses 1-5 scoring, with higher scores being better;
         # We normalize this to binary classification for compatibility
         # Scores >= 3 are considered "good quality", < 3 are "low quality"
         if score >= 3:
-            result.eval_status = False
+            result.status = False
             # result.type = cls.prompt.metric_type
             # result.name = "HighQuality"
             # result.reason = [f"Score: {score}/5. {reason}"]
-            result.eval_details = {
-                "label": [f"{cls.__name__}.HighQuality"],
-                "metric": [cls.__name__],
-                "reason": [f"Score: {score}/5. {reason}"]
-            }
+            result.label = [f"{cls.__name__}.HighQuality"]
+            result.reason = [f"Score: {score}/5. {reason}"]
         else:
-            result.eval_status = True
+            result.status = True
             # result.type = cls.prompt.metric_type
             # result.name = "LowQuality"
             # result.reason = [f"Score: {score}/5. {reason}"]
-            result.eval_details = {
-                "label": [f"{cls.__name__}.LowQuality"],
-                "metric": [cls.__name__],
-                "reason": [f"Score: {score}/5. {reason}"]
-            }
+            result.label = [f"{cls.__name__}.LowQuality"]
+            result.reason = [f"Score: {score}/5. {reason}"]
 
         return result
diff --git a/dingo/model/llm/meta_rater/llm_meta_rater_readability.py b/dingo/model/llm/meta_rater/llm_meta_rater_readability.py
index b169978f..05f6670b 100644
--- a/dingo/model/llm/meta_rater/llm_meta_rater_readability.py
+++ b/dingo/model/llm/meta_rater/llm_meta_rater_readability.py
@@ -9,9 +9,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -86,7 +86,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         """
         Process the LLM response for Meta-rater Readability evaluation.
 
@@ -94,7 +94,7 @@ def process_response(cls, response: str) -> ModelRes:
             response: Raw response string from the LLM
 
         Returns:
-            ModelRes: Processed evaluation results with score and reason
+            EvalDetail: Processed evaluation results with score and reason
         """
         log.info(response)
 
@@ -116,30 +116,24 @@ def process_response(cls, response: str) -> ModelRes:
         score = response_json.get('score', 0)
         reason = response_json.get('reason', '')
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
 
         # Meta-rater uses 1-5 scoring, with higher scores being better;
         # We normalize this to binary classification for compatibility
         # Scores >= 3 are considered "good quality", < 3 are "low quality"
         if score >= 3:
-            result.eval_status = False
+            result.status = False
             # result.type = cls.prompt.metric_type
             # result.name = "HighQuality"
             # result.reason = [f"Score: {score}/5. {reason}"]
-            result.eval_details = {
-                "label": [f"{cls.__name__}.HighQuality"],
-                "metric": [cls.__name__],
-                "reason": [f"Score: {score}/5. {reason}"]
-            }
+            result.label = [f"{cls.__name__}.HighQuality"]
+            result.reason = [f"Score: {score}/5. {reason}"]
         else:
-            result.eval_status = True
+            result.status = True
             # result.type = cls.prompt.metric_type
             # result.name = "LowQuality"
             # result.reason = [f"Score: {score}/5. {reason}"]
-            result.eval_details = {
-                "label": [f"{cls.__name__}.LowQuality"],
-                "metric": [cls.__name__],
-                "reason": [f"Score: {score}/5. {reason}"]
-            }
+            result.label = [f"{cls.__name__}.LowQuality"]
+            result.reason = [f"Score: {score}/5. {reason}"]
 
         return result
diff --git a/dingo/model/llm/meta_rater/llm_meta_rater_reasoning.py b/dingo/model/llm/meta_rater/llm_meta_rater_reasoning.py
index b4b180cd..306b6e81 100644
--- a/dingo/model/llm/meta_rater/llm_meta_rater_reasoning.py
+++ b/dingo/model/llm/meta_rater/llm_meta_rater_reasoning.py
@@ -9,9 +9,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -86,7 +86,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         """
         Process the LLM response for Meta-rater Reasoning evaluation.
 
@@ -94,7 +94,7 @@ def process_response(cls, response: str) -> ModelRes:
             response: Raw response string from the LLM
 
         Returns:
-            ModelRes: Processed evaluation results with score and reason
+            EvalDetail: Processed evaluation results with score and reason
         """
         log.info(response)
 
@@ -116,30 +116,24 @@ def process_response(cls, response: str) -> ModelRes:
         score = response_json.get('score', 0)
         reason = response_json.get('reason', '')
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
 
         # Meta-rater uses 1-5 scoring, with higher scores being better;
         # We normalize this to binary classification for compatibility
         # Scores >= 3 are considered "good quality", < 3 are "low quality"
         if score >= 3:
-            result.eval_status = False
+            result.status = False
             # result.type = cls.prompt.metric_type
             # result.name = "HighQuality"
             # result.reason = [f"Score: {score}/5. {reason}"]
-            result.eval_details = {
-                "label": [f"{cls.__name__}.HighQuality"],
-                "metric": [cls.__name__],
-                "reason": [f"Score: {score}/5. {reason}"]
-            }
+            result.label = [f"{cls.__name__}.HighQuality"]
+            result.reason = [f"Score: {score}/5. {reason}"]
         else:
-            result.eval_status = True
+            result.status = True
             # result.type = cls.prompt.metric_type
             # result.name = "LowQuality"
             # result.reason = [f"Score: {score}/5. {reason}"]
-            result.eval_details = {
-                "label": [f"{cls.__name__}.LowQuality"],
-                "metric": [cls.__name__],
-                "reason": [f"Score: {score}/5. {reason}"]
-            }
+            result.label = [f"{cls.__name__}.LowQuality"]
+            result.reason = [f"Score: {score}/5. {reason}"]
 
         return result
diff --git a/dingo/model/llm/mineru/vlm_document_parsing.py b/dingo/model/llm/mineru/vlm_document_parsing.py
index d122ddf2..2f9a83bd 100644
--- a/dingo/model/llm/mineru/vlm_document_parsing.py
+++ b/dingo/model/llm/mineru/vlm_document_parsing.py
@@ -3,9 +3,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 
 
@@ -192,7 +192,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         response = response.replace("```json", "")
@@ -218,12 +218,12 @@ def process_response(cls, response: str) -> ModelRes:
             except json.JSONDecodeError as e:
                 log.error(f"JSON解析错误: {e}")
 
-        result = ModelRes()
-        # result.eval_status = False
+        result = EvalDetail(metric=cls.__name__)
+        # result.status = False
         # result.type = types
         # result.name = names
         # result.reason = [response]
-        result.eval_details.label = tmp_types
-        result.eval_details.reason = [response]
+        result.label = tmp_types
+        result.reason = [response]
 
         return result
diff --git a/dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py b/dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py
index 861d5f9d..85dfea3e 100644
--- a/dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py
+++ b/dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py
@@ -4,12 +4,10 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
-from dingo.model.response.response_class import ResponseScoreReason
 from dingo.utils import log
-from dingo.utils.exception import ConvertJsonError
 
 
 @Model.llm_register("VLMDocumentParsingOCRTrain")
@@ -109,7 +107,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
         json_match = re.search(r'\{[\s\S]*"errors"[\s\S]*\}', response)
         # types = []
@@ -135,12 +133,12 @@ def process_response(cls, response: str) -> ModelRes:
         else:
             log.error("未找到JSON内容")
 
-        result = ModelRes()
-        result.eval_status = False
+        result = EvalDetail(metric=cls.__name__)
+        result.status = False
         # result.type = types
         # result.name = names
         # result.reason = [json_str] if 'json_str' in locals() else [response]
-        result.eval_details.label = tmp_types
-        result.eval_details.reason = [json_str] if 'json_str' in locals() else [response]
+        result.label = tmp_types
+        result.reason = [json_str] if 'json_str' in locals() else [response]
 
         return result
diff --git a/dingo/model/llm/rag/llm_rag_answer_relevancy.py b/dingo/model/llm/rag/llm_rag_answer_relevancy.py
index 13e859f0..b9d7dbae 100644
--- a/dingo/model/llm/rag/llm_rag_answer_relevancy.py
+++ b/dingo/model/llm/rag/llm_rag_answer_relevancy.py
@@ -11,9 +11,9 @@
 import numpy as np
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -204,7 +204,7 @@ def calculate_score(cls, answers: List[Dict[str, Any]], original_question: str)
         return score
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         """评估答案相关性"""
         # 初始化embedding模型（如果尚未初始化）
         if cls.embedding_model is None:
@@ -234,7 +234,7 @@ def eval(cls, input_data: Data) -> ModelRes:
             score = cls.calculate_score(generated_questions, original_question)
 
             # 构建结果
-            result = ModelRes()
+            result = EvalDetail(metric=cls.__name__)
             result.score = score
 
             # 根据分数判断是否通过，默认阈值为5
@@ -250,29 +250,20 @@ def eval(cls, input_data: Data) -> ModelRes:
                     cls.init_embedding_model(embedding_model_name)
 
             if score >= threshold:
-                result.eval_status = False
-                result.eval_details = {
-                    "label": ["QUALITY_GOOD.ANSWER_RELEVANCY_PASS"],
-                    "metric": [cls.__name__],
-                    "reason": [f"答案相关性评估通过 (分数: {score:.2f}/10)"]
-                }
+                result.status = False
+                result.label = ["QUALITY_GOOD.ANSWER_RELEVANCY_PASS"]
+                result.reason = [f"答案相关性评估通过 (分数: {score:.2f}/10)"]
             else:
-                result.eval_status = True
-                result.eval_details = {
-                    "label": ["QUALITY_BAD.ANSWER_RELEVANCY_FAIL"],
-                    "metric": [cls.__name__],
-                    "reason": [f"答案相关性评估未通过 (分数: {score:.2f}/10)"]
-                }
+                result.status = True
+                result.label = ["QUALITY_BAD.ANSWER_RELEVANCY_FAIL"]
+                result.reason = [f"答案相关性评估未通过 (分数: {score:.2f}/10)"]
 
             return result
 
         except Exception as e:
             log.error(f"Answer Relevancy评估出错: {str(e)}")
-            result = ModelRes()
-            result.eval_status = True
-            result.eval_details = {
-                "label": ["QUALITY_BAD.ANSWER_RELEVANCY_ERROR"],
-                "metric": [cls.__name__],
-                "reason": [f"答案相关性评估出错: {str(e)}"]
-            }
+            result = EvalDetail(metric=cls.__name__)
+            result.status = True
+            result.label = ["QUALITY_BAD.ANSWER_RELEVANCY_ERROR"]
+            result.reason = [f"答案相关性评估出错: {str(e)}"]
             return result
diff --git a/dingo/model/llm/rag/llm_rag_context_precision.py b/dingo/model/llm/rag/llm_rag_context_precision.py
index 85a514e3..e9cefb5a 100644
--- a/dingo/model/llm/rag/llm_rag_context_precision.py
+++ b/dingo/model/llm/rag/llm_rag_context_precision.py
@@ -8,10 +8,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
-from dingo.model.response.response_class import ResponseScoreReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -114,7 +113,6 @@ def _calculate_average_precision(cls, verdicts: List[bool]) -> float:
         Returns:
             float: 平均精度分数
         """
-        import numpy as np
 
         # 转换为0/1列表
         verdict_list = [1 if v else 0 for v in verdicts]
@@ -197,14 +195,14 @@ def build_messages(cls, input_data: Data) -> List:
         return messages_list
 
     @classmethod
-    def process_response(cls, responses: List[str]) -> ModelRes:
+    def process_response(cls, responses: List[str]) -> EvalDetail:
         """处理LLM响应
 
         Args:
             responses: 每个上下文的评估响应列表
 
         Returns:
-            ModelRes: 评估结果
+            EvalDetail: 评估结果
         """
         log.info(f"RAG Context Precision responses: {responses}")
 
@@ -251,7 +249,7 @@ def process_response(cls, responses: List[str]) -> ModelRes:
         reason_text = "\n\n".join(all_reasons)
         reason_text += f"\n\n平均精度: {avg_precision:.4f}，转换为0-10分: {score}/10"
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         result.score = score
 
         # 根据分数判断是否通过，默认阈值为5
@@ -260,24 +258,18 @@ def process_response(cls, responses: List[str]) -> ModelRes:
             threshold = cls.dynamic_config.parameters.get('threshold', 5)
 
         if score >= threshold:
-            result.eval_status = False
-            result.eval_details = {
-                "label": ["QUALITY_GOOD.CONTEXT_PRECISION_PASS"],
-                "metric": [cls.__name__],
-                "reason": [f"上下文精度评估通过 (分数: {score}/10)\n{reason_text}"]
-            }
+            result.status = False
+            result.label = ["QUALITY_GOOD.CONTEXT_PRECISION_PASS"]
+            result.reason = [f"上下文精度评估通过 (分数: {score}/10)\n{reason_text}"]
         else:
-            result.eval_status = True
-            result.eval_details = {
-                "label": ["QUALITY_BAD.CONTEXT_PRECISION_FAIL"],
-                "metric": [cls.__name__],
-                "reason": [f"上下文精度评估未通过 (分数: {score}/10)\n{reason_text}"]
-            }
+            result.status = True
+            result.label = ["QUALITY_BAD.CONTEXT_PRECISION_FAIL"]
+            result.reason = [f"上下文精度评估未通过 (分数: {score}/10)\n{reason_text}"]
 
         return result
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         """重写父类的eval方法，支持为每个上下文发送单独的请求"""
         if cls.client is None:
             cls.create_client()
@@ -303,13 +295,16 @@ def eval(cls, input_data: Data) -> ModelRes:
 
             if response is None:
                 # 如果所有尝试都失败，返回错误结果
-                res = ModelRes()
-                res.eval_status = True
-                res.eval_details = {
-                    "label": ["QUALITY_BAD.REQUEST_FAILED"],
-                    "metric": [cls.__name__],
-                    "reason": [f"为上下文{item['context_index']+1}发送请求失败"]
-                }
+                res = EvalDetail(metric=cls.__name__)
+                # res.eval_status = True
+                # res.eval_details = {
+                #     "label": ["QUALITY_BAD.REQUEST_FAILED"],
+                #     "metric": [cls.__name__],
+                #     "reason": [f"为上下文{item['context_index']+1}发送请求失败"]
+                # }
+                res.status = True
+                res.label = ["QUALITY_BAD.REQUEST_FAILED"]
+                res.reason = [f"为上下文{item['context_index']+1}发送请求失败"]
                 return res
 
             responses.append(response)
diff --git a/dingo/model/llm/rag/llm_rag_context_recall.py b/dingo/model/llm/rag/llm_rag_context_recall.py
index 0b6019b5..2b814101 100644
--- a/dingo/model/llm/rag/llm_rag_context_recall.py
+++ b/dingo/model/llm/rag/llm_rag_context_recall.py
@@ -8,10 +8,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
-from dingo.model.response.response_class import ResponseScoreReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -160,7 +159,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         """
         处理LLM响应
 
@@ -168,7 +167,7 @@ def process_response(cls, response: str) -> ModelRes:
             response: LLM原始响应
 
         Returns:
-            ModelRes对象
+            EvalDetail对象
         """
         log.info(f"RAG Context Recall response: {response}")
 
@@ -198,7 +197,7 @@ def process_response(cls, response: str) -> ModelRes:
         # 生成reason
         reason = f"在 {total_statements} 个陈述中，有 {attributed_statements} 个可以从上下文中归因，{total_statements - attributed_statements} 个不能归因"
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         result.score = score
 
         # 根据分数判断是否通过，默认阈值为5
@@ -207,18 +206,12 @@ def process_response(cls, response: str) -> ModelRes:
             threshold = cls.dynamic_config.parameters.get('threshold', 5)
 
         if score >= threshold:
-            result.eval_status = False
-            result.eval_details = {
-                "label": ["QUALITY_GOOD.CONTEXT_RECALL_PASS"],
-                "metric": [cls.__name__],
-                "reason": [f"上下文召回评估通过 (分数: {score:.2f}/10)\n{reason}"]
-            }
+            result.status = False
+            result.label = ["QUALITY_GOOD.CONTEXT_RECALL_PASS"]
+            result.reason = [f"上下文召回评估通过 (分数: {score:.2f}/10)\n{reason}"]
         else:
-            result.eval_status = True
-            result.eval_details = {
-                "label": ["QUALITY_BAD.CONTEXT_RECALL_FAIL"],
-                "metric": [cls.__name__],
-                "reason": [f"上下文召回评估未通过 (分数: {score:.2f}/10)\n{reason}"]
-            }
+            result.status = True
+            result.label = ["QUALITY_BAD.CONTEXT_RECALL_FAIL"]
+            result.reason = [f"上下文召回评估未通过 (分数: {score:.2f}/10)\n{reason}"]
 
         return result
diff --git a/dingo/model/llm/rag/llm_rag_context_relevancy.py b/dingo/model/llm/rag/llm_rag_context_relevancy.py
index 734f7314..668f643b 100644
--- a/dingo/model/llm/rag/llm_rag_context_relevancy.py
+++ b/dingo/model/llm/rag/llm_rag_context_relevancy.py
@@ -8,10 +8,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
-from dingo.model.response.response_class import ResponseScoreReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -160,7 +159,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         """
         处理LLM响应
 
@@ -168,7 +167,7 @@ def process_response(cls, response: str) -> ModelRes:
             response: LLM原始响应
 
         Returns:
-            ModelRes对象
+            EvalDetail对象
         """
         log.info(f"RAG Context Relevancy response: {response}")
 
@@ -199,7 +198,7 @@ def process_response(cls, response: str) -> ModelRes:
         else:  # rating == 2
             reason = "上下文包含与问题相关的信息"
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         result.score = score
 
         # 根据分数判断是否通过，默认阈值为5
@@ -208,18 +207,12 @@ def process_response(cls, response: str) -> ModelRes:
             threshold = cls.dynamic_config.parameters.get('threshold', 5)
 
         if score >= threshold:
-            result.eval_status = False
-            result.eval_details = {
-                "label": ["QUALITY_GOOD.CONTEXT_RELEVANCY_PASS"],
-                "metric": [cls.__name__],
-                "reason": [f"上下文相关性评估通过 (分数: {score:.2f}/10)\n{reason}"]
-            }
+            result.status = False
+            result.label = ["QUALITY_GOOD.CONTEXT_RELEVANCY_PASS"]
+            result.reason = [f"上下文相关性评估通过 (分数: {score:.2f}/10)\n{reason}"]
         else:
-            result.eval_status = True
-            result.eval_details = {
-                "label": ["QUALITY_BAD.CONTEXT_RELEVANCY_FAIL"],
-                "metric": [cls.__name__],
-                "reason": [f"上下文相关性评估未通过 (分数: {score:.2f}/10)\n{reason}"]
-            }
+            result.status = True
+            result.label = ["QUALITY_BAD.CONTEXT_RELEVANCY_FAIL"]
+            result.reason = [f"上下文相关性评估未通过 (分数: {score:.2f}/10)\n{reason}"]
 
         return result
diff --git a/dingo/model/llm/rag/llm_rag_faithfulness.py b/dingo/model/llm/rag/llm_rag_faithfulness.py
index c31a5a50..09409697 100644
--- a/dingo/model/llm/rag/llm_rag_faithfulness.py
+++ b/dingo/model/llm/rag/llm_rag_faithfulness.py
@@ -8,10 +8,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
-from dingo.model.response.response_class import ResponseScoreReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -241,7 +240,7 @@ def build_messages(cls, input_data: Data) -> List:
         return messages
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         """
         处理LLM响应
 
@@ -249,7 +248,7 @@ def process_response(cls, response: str) -> ModelRes:
             response: LLM原始响应
 
         Returns:
-            ModelRes对象
+            EvalDetail对象
         """
         log.info(f"RAG Faithfulness response: {response}")
 
@@ -283,7 +282,7 @@ def process_response(cls, response: str) -> ModelRes:
         else:
             reason = "未提取到任何陈述"
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         result.score = score
 
         # 根据分数判断是否通过，默认阈值为5
@@ -292,18 +291,12 @@ def process_response(cls, response: str) -> ModelRes:
             threshold = cls.dynamic_config.parameters.get('threshold', 5)
 
         if score >= threshold:
-            result.eval_status = False
-            result.eval_details = {
-                "label": ["QUALITY_GOOD.FAITHFULNESS_PASS"],
-                "metric": [cls.__name__],
-                "reason": [f"忠实度评估通过 (分数: {score:.2f}/10)\n{reason}"]
-            }
+            result.status = False
+            result.label = ["QUALITY_GOOD.FAITHFULNESS_PASS"]
+            result.reason = [f"忠实度评估通过 (分数: {score:.2f}/10)\n{reason}"]
         else:
-            result.eval_status = True
-            result.eval_details = {
-                "label": ["QUALITY_BAD.FAITHFULNESS_FAIL"],
-                "metric": [cls.__name__],
-                "reason": [f"忠实度评估未通过 (分数: {score:.2f}/10)\n{reason}"]
-            }
+            result.status = True
+            result.label = ["QUALITY_BAD.FAITHFULNESS_FAIL"]
+            result.reason = [f"忠实度评估未通过 (分数: {score:.2f}/10)\n{reason}"]
 
         return result
diff --git a/dingo/model/llm/security/llm_security.py b/dingo/model/llm/security/llm_security.py
index 2d9d7aa3..287a5fb5 100644
--- a/dingo/model/llm/security/llm_security.py
+++ b/dingo/model/llm/security/llm_security.py
@@ -1,8 +1,7 @@
 import json
 
-from dingo.model import Model
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -10,7 +9,7 @@
 # @Model.llm_register("LLMSecurity")
 class LLMSecurity(BaseOpenAI):
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -24,19 +23,13 @@ def process_response(cls, response: str) -> ModelRes:
         except json.JSONDecodeError:
             raise ConvertJsonError(f"Convert to JSON format failed: {response}")
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         tmp_reason = []
         for k, v in response_json.items():
             if v == "pos":
-                result.eval_status = True
-                # result.type = "Security"
-                # result.name = cls.prompt.__name__
-                # result.reason.append(k)
+                result.status = True
                 tmp_reason.append(k)
 
-        result.eval_details = {
-            "label": [f"Security.{cls.__name__}"],
-            "metric": [cls.__name__],
-            "reason": tmp_reason
-        }
+        result.label = [f"Security.{cls.__name__}"]
+        result.reason = tmp_reason
         return result
diff --git a/dingo/model/llm/text_quality/llm_text_quality_v3.py b/dingo/model/llm/text_quality/llm_text_quality_v3.py
index 995b3a35..51c08c7e 100644
--- a/dingo/model/llm/text_quality/llm_text_quality_v3.py
+++ b/dingo/model/llm/text_quality/llm_text_quality_v3.py
@@ -1,8 +1,8 @@
 import json
 
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
@@ -49,7 +49,7 @@ class LLMTextQualityV3(BaseOpenAI):
     """
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         # 清理 markdown 代码块
@@ -79,13 +79,10 @@ def process_response(cls, response: str) -> ModelRes:
         if not isinstance(reason_list, list):
             reason_list = [reason_list] if reason_list else []
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         if score == 1:
-            result.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD],
-                "metric": [cls.__name__],
-                "reason": reason_list if reason_list else [""]
-            }
+            result.label = [QualityLabel.QUALITY_GOOD]
+            result.reason = reason_list if reason_list else [""]
         else:
             # 构建标签：type.name 格式
             labels = []
@@ -94,11 +91,8 @@ def process_response(cls, response: str) -> ModelRes:
             if not labels:
                 labels = [f"QUALITY_BAD.{cls.__name__}"]
 
-            result.eval_status = True
-            result.eval_details = {
-                "label": labels,
-                "metric": [cls.__name__],
-                "reason": reason_list if reason_list else [""]
-            }
+            result.status = True
+            result.label = labels
+            result.reason = reason_list if reason_list else [""]
 
         return result
diff --git a/dingo/model/llm/text_quality/llm_text_repeat.py b/dingo/model/llm/text_quality/llm_text_repeat.py
index 516c3386..5a162095 100644
--- a/dingo/model/llm/text_quality/llm_text_repeat.py
+++ b/dingo/model/llm/text_quality/llm_text_repeat.py
@@ -1,8 +1,8 @@
 import json
 
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.response.response_class import ResponseScoreTypeNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -19,7 +19,7 @@ class LLMTextRepeat(BaseOpenAI):
     """
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -35,24 +35,14 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseScoreTypeNameReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         # eval_status
         if response_model.score == 1:
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.label = [QualityLabel.QUALITY_GOOD]
+            result.reason = [response_model.reason]
         else:
-            result.eval_status = True
-            # result.type = response_model.type
-            # result.name = response_model.name
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [f"{response_model.type}.{response_model.name}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.status = True
+            result.label = [f"{response_model.type}.{response_model.name}"]
+            result.reason = [response_model.reason]
 
         return result
diff --git a/dingo/model/llm/text_quality/llm_text_unread_issue.py b/dingo/model/llm/text_quality/llm_text_unread_issue.py
index ab42fe38..155d5786 100644
--- a/dingo/model/llm/text_quality/llm_text_unread_issue.py
+++ b/dingo/model/llm/text_quality/llm_text_unread_issue.py
@@ -1,8 +1,8 @@
 import json
 
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.response.response_class import ResponseScoreTypeNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -41,7 +41,7 @@ class LLMTextUnreadIssue(BaseOpenAI):
     """
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -57,24 +57,14 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseScoreTypeNameReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         # eval_status
         if response_model.score == 1:
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.label = [QualityLabel.QUALITY_GOOD]
+            result.reason = [response_model.reason]
         else:
-            result.eval_status = True
-            # result.type = response_model.type
-            # result.name = response_model.name
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [f"{response_model.type}.{response_model.name}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.status = True
+            result.label = [f"{response_model.type}.{response_model.name}"]
+            result.reason = [response_model.reason]
 
         return result
diff --git a/dingo/model/llm/text_quality/llm_text_word_stick.py b/dingo/model/llm/text_quality/llm_text_word_stick.py
index 91164a7d..182a3608 100644
--- a/dingo/model/llm/text_quality/llm_text_word_stick.py
+++ b/dingo/model/llm/text_quality/llm_text_word_stick.py
@@ -1,8 +1,8 @@
 import json
 
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.response.response_class import ResponseScoreTypeNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
@@ -35,7 +35,7 @@ class LLMTextWordStick(BaseOpenAI):
     """
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         if response.startswith("```json"):
@@ -51,24 +51,14 @@ def process_response(cls, response: str) -> ModelRes:
 
         response_model = ResponseScoreTypeNameReason(**response_json)
 
-        result = ModelRes()
+        result = EvalDetail(metric=cls.__name__)
         # eval_status
         if response_model.score == 1:
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.label = [QualityLabel.QUALITY_GOOD]
+            result.reason = [response_model.reason]
         else:
-            result.eval_status = True
-            # result.type = response_model.type
-            # result.name = response_model.name
-            # result.reason = [response_model.reason]
-            result.eval_details = {
-                "label": [f"{response_model.type}.{response_model.name}"],
-                "metric": [cls.__name__],
-                "reason": [response_model.reason]
-            }
+            result.status = True
+            result.label = [f"{response_model.type}.{response_model.name}"]
+            result.reason = [response_model.reason]
 
         return result
diff --git a/dingo/model/llm/vlm_layout_quality.py b/dingo/model/llm/vlm_layout_quality.py
index 91851541..95c9303c 100644
--- a/dingo/model/llm/vlm_layout_quality.py
+++ b/dingo/model/llm/vlm_layout_quality.py
@@ -4,9 +4,9 @@
 from typing import List
 
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
 from dingo.utils import log
 
 
@@ -212,14 +212,13 @@ def send_messages(cls, messages: List):
         return str(completions.choices[0].message.content)
 
     @classmethod
-    def process_response(cls, response: str) -> ModelRes:
+    def process_response(cls, response: str) -> EvalDetail:
         log.info(response)
 
         response = response.replace("```json", "")
         response = response.replace("```", "")
 
         types = []
-        # names = []
 
         if response:
             try:
@@ -231,16 +230,11 @@ def process_response(cls, response: str) -> ModelRes:
 
                     if eval_details:
                         types.append(eval_details)
-                        # names.append(eval_details)
             except json.JSONDecodeError as e:
                 log.error(f"JSON解析错误: {e}")
 
-        result = ModelRes()
-        # result.eval_status = False
-        # result.type = types
-        # result.name = names
-        # result.reason = [response]
-        result.eval_details.label = types
-        result.eval_details.reason = [response]
+        result = EvalDetail(metric=cls.__name__)
+        result.label = types
+        result.reason = [response]
 
         return result
diff --git a/dingo/model/llm/vlm_ocr_understanding.py b/dingo/model/llm/vlm_ocr_understanding.py
index 64d4336c..90047cd5 100644
--- a/dingo/model/llm/vlm_ocr_understanding.py
+++ b/dingo/model/llm/vlm_ocr_understanding.py
@@ -1,13 +1,7 @@
-import base64
-import json
-import os
-from typing import List
-
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
-from dingo.utils import log
 
 
 @Model.llm_register("VLMOCRUnderstanding")
@@ -181,5 +175,5 @@ class VLMOCRUnderstanding(BaseOpenAI):
     """
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         pass  # TODO
diff --git a/dingo/model/modelres.py b/dingo/model/modelres.py
deleted file mode 100644
index f66e2c03..00000000
--- a/dingo/model/modelres.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from typing import Any, Dict, List, Optional
-
-from pydantic import BaseModel, Field
-
-
-class QualityLabel:
-    """质量标签常量类"""
-    QUALITY_GOOD = "QUALITY_GOOD"  # Indicates pass the quality check
-    QUALITY_BAD_PREFIX = "QUALITY_BAD_"  # Indicates not pass the quality check
-
-
-class EvalDetail(BaseModel):
-    label: list[str] = []
-    metric: list[str] = []
-    reason: list = []
-
-    def merge(self, other: 'EvalDetail') -> None:
-        # 合并并去重 label 和 metric
-        self.label = list(set(self.label + other.label))
-        self.metric = list(set(self.metric + other.metric))
-        self.reason.extend(other.reason)
-
-    def copy(self) -> 'EvalDetail':
-        """创建当前 EvalDetail 的深拷贝"""
-        return EvalDetail(
-            label=self.label.copy(),
-            metric=self.metric.copy(),
-            reason=self.reason.copy()
-        )
-
-    def to_dict(self) -> Dict[str, Any]:
-        """将 EvalDetail 转换为字典"""
-        return {
-            'label': self.label,
-            'metric': self.metric,
-            'reason': self.reason
-        }
-
-
-class ModelRes(BaseModel):
-    eval_status: bool = False
-    eval_details: EvalDetail = EvalDetail()
-    score: Optional[float] = None
-
-    def __setattr__(self, name, value):
-        # 在赋值时拦截 eval_details 字段
-        if name == 'eval_details' and isinstance(value, dict):
-            value = EvalDetail(**value)
-        super().__setattr__(name, value)
diff --git a/dingo/model/rule/base.py b/dingo/model/rule/base.py
index d6655e34..ff6dded6 100644
--- a/dingo/model/rule/base.py
+++ b/dingo/model/rule/base.py
@@ -2,7 +2,7 @@
 
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
-from dingo.model.modelres import ModelRes
+from dingo.io.output.eval_detail import EvalDetail
 
 
 class BaseRule:
@@ -11,5 +11,5 @@ class BaseRule:
     dynamic_config: EvaluatorRuleArgs
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         raise NotImplementedError()
diff --git a/dingo/model/rule/rule_audio.py b/dingo/model/rule/rule_audio.py
index 3e869916..26d99fd2 100644
--- a/dingo/model/rule/rule_audio.py
+++ b/dingo/model/rule/rule_audio.py
@@ -4,8 +4,8 @@
 
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model.model import Model
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.rule.base import BaseRule
 
 
@@ -37,11 +37,11 @@ class RuleAudioDuration(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         import librosa
         from scipy.signal import welch
 
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
 
         y, sr = librosa.load(input_data.content, sr=16000)
         f_signal, Pxx_signal = welch(y, fs=sr)
@@ -51,26 +51,19 @@ def eval(cls, input_data: Data) -> ModelRes:
         noise_power = np.sum(Pxx_noise)
 
         if noise_power == 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The audio power is zero. Cannot calculate SNR."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The audio power is zero. Cannot calculate SNR."]
+            return res
 
         snr_dB = round(10 * np.log10(signal_power / noise_power), 2)
 
         if snr_dB < 8:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The audio signal-to-noise ratio is too low."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The audio signal-to-noise ratio is too low."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -102,10 +95,10 @@ class RuleAudioSnrQuality(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         import wave
 
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         if not input_data.content:
             return res
         if isinstance(input_data.content, str):
@@ -115,16 +108,11 @@ def eval(cls, input_data: Data) -> ModelRes:
                 duration = frame_count / sample_rate
 
         if duration > 10:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The audio duration is too long."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The audio duration is too long."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
diff --git a/dingo/model/rule/rule_common.py b/dingo/model/rule/rule_common.py
index a8d1b879..2a415802 100644
--- a/dingo/model/rule/rule_common.py
+++ b/dingo/model/rule/rule_common.py
@@ -4,8 +4,8 @@
 
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model.model import Model
-from dingo.model.modelres import EvalDetail, ModelRes, QualityLabel
 from dingo.model.rule.base import BaseRule
 
 
@@ -25,19 +25,18 @@ class RuleAbnormalChar(BaseRule):
     }
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         for r in [RuleSpecialCharacter, RuleInvisibleChar]:
             tmp_res = r.eval(input_data)
-            # print(tmp_res)
-            if tmp_res.eval_status:
-                res.eval_status = True
-                if isinstance(tmp_res.eval_details, dict):
-                    tmp_res.eval_details = EvalDetail(**tmp_res.eval_details)
-                res.eval_details.merge(tmp_res.eval_details)
+            if tmp_res.status:
+                res.status = True
+                # res.merge(tmp_res)
+                res.label = [f"{cls.metric_type}.{cls.__name__}"]
+                res.reason = [] if res.reason is None else res.reason.extend(tmp_res.reason)
         # Set QUALITY_GOOD when all checks pass
-        if not res.eval_status:
-            res.eval_details = EvalDetail(label=[QualityLabel.QUALITY_GOOD])
+        if not res.status:
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -56,18 +55,18 @@ class RuleAbnormalHtml(BaseRule):
     }
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         for r in [RuleHtmlEntity, RuleHtmlTag]:
             tmp_res = r.eval(input_data)
-            if tmp_res.eval_status:
-                res.eval_status = True
-                if isinstance(tmp_res.eval_details, dict):
-                    tmp_res.eval_details = EvalDetail(**tmp_res.eval_details)
-                res.eval_details.merge(tmp_res.eval_details)
+            if tmp_res.status:
+                res.status = True
+                # res.merge(tmp_res)
+                res.label = [f"{cls.metric_type}.{cls.__name__}"]
+                res.reason = [] if res.reason is None else res.reason.extend(tmp_res.reason)
         # Set QUALITY_GOOD when all checks pass
-        if not res.eval_status:
-            res.eval_details = EvalDetail(label=[QualityLabel.QUALITY_GOOD])
+        if not res.status:
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -87,17 +86,16 @@ class RuleAbnormalNumber(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r"\n{4}\d+\n{4}")
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         match = re.search(cls.dynamic_config.pattern, content)
         if match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [match.group(0).strip("\n")]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [match.group(0).strip("\n")]
+        else:
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -118,9 +116,9 @@ class RuleAlphaWords(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=0.6)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from nltk.tokenize import word_tokenize
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         words = word_tokenize(content)
         n_words = len(words)
@@ -129,19 +127,14 @@ def eval(cls, input_data: Data) -> ModelRes:
         n_alpha_words = sum([any((c.isalpha() for c in w)) for w in words])
         ratio = n_alpha_words / n_words
         if ratio > cls.dynamic_config.threshold:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         else:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [
-                    "The ratio of words that contain at least one alphabetic character is: "
-                    + str(ratio)
-                ]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [
+                "The ratio of words that contain at least one alphabetic character is: "
+                + str(ratio)
+            ]
         return res
 
 
@@ -173,23 +166,17 @@ class RuleAudioDataFormat(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
 
         raw_data = input_data.raw_data
         key_list = ["id", "audio", "text"]
         if all(key in raw_data for key in key_list):
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
-            return res
+            res.label = [QualityLabel.QUALITY_GOOD]
         else:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Audio Data format error"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Audio Data format error"]
         return res
 
 
@@ -211,9 +198,9 @@ class RuleCapitalWords(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=0.2)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from nltk.tokenize import WordPunctTokenizer
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         words = WordPunctTokenizer().tokenize(content)
         num_words = len(words)
@@ -222,16 +209,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         num_caps_words = sum(map(str.isupper, words))
         ratio = num_caps_words / num_words
         if ratio > cls.dynamic_config.threshold and num_words < 200:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["ratio: " + str(ratio)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["ratio: " + str(ratio)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -252,8 +234,8 @@ class RuleCharNumber(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=100)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         text = input_data.content
         text = text.strip()
         text = text.replace(" ", "")
@@ -261,16 +243,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         text = text.replace("\t", "")
         num_char = len(text)
         if num_char < cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The number of char is: " + str(num_char)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The number of char is: " + str(num_char)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -293,22 +270,17 @@ class RuleCharSplit(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         matches = re.findall(cls.dynamic_config.pattern, content)
         count = len(matches)
         if count >= cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": matches
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = matches
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -333,22 +305,26 @@ class RuleColonEnd(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         if len(content) <= 0:
             return res
         if content[-1] == ":":
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [content[-100:]]
-            }
+            # res.eval_status = True
+            # res.eval_details = {
+            #     "label": [f"{cls.metric_type}.{cls.__name__}"],
+            #     "metric": [cls.__name__],
+            #     "reason": [content[-100:]]
+            # }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [content[-100:]]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            # res.eval_details = {
+            #     "label": [QualityLabel.QUALITY_GOOD]
+            # }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -389,20 +365,15 @@ class RuleContentNull(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         count = len(input_data.content.strip())
         if count == 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content is empty."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content is empty."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -425,20 +396,15 @@ class RuleContentShort(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=20)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content.encode("utf-8")
         if len(content) <= cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content is too short."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content is too short."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -471,23 +437,18 @@ class RuleContentShortMultiLan(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=20)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from nltk.tokenize import WordPunctTokenizer
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         tk = WordPunctTokenizer()
         tokens = tk.tokenize(input_data.content)
         words = [word for word in tokens if word.isalpha()]
         if len(words) < cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content is too short."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content is too short."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -508,26 +469,21 @@ class RuleCurlyBracket(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=0.025)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         if len(content) == 0:
             return res
         num = content.count("{") + content.count("}")
         ratio = num / len(content)
         if ratio > cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [
-                    "The ratio of curly bracket and characters is : " + str(ratio)
-                ]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [
+                "The ratio of curly bracket and characters is : " + str(ratio)
+            ]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -567,24 +523,19 @@ class RuleDocRepeat(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=80)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.util import base_rps_frac_chars_in_dupe_ngrams
 
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         repeat_score = base_rps_frac_chars_in_dupe_ngrams(6, input_data.content)
         if repeat_score >= cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [
-                    "Repeatability of text is too high, with ratio： " + str(repeat_score)
-                ]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [
+                "Repeatability of text is too high, with ratio： " + str(repeat_score)
+            ]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -613,8 +564,8 @@ class RuleDocFormulaRepeat(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=20)  # 设置阈值为20
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
 
         # 提取所有公式
         pattern = r'(?:\$\$(.*?)\$\$|\\\((.*?)\\\))'
@@ -629,20 +580,15 @@ def eval(cls, input_data: Data) -> ModelRes:
         repeat_analysis = cls.analyze_repeats(formula_content)
         # 如果总连续重复长度超过阈值，则标记为错误
         if repeat_analysis['total_repeat_length'] >= cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [
-                    f"Formula has too many consecutive repeated characters, "
-                    f"total repeat length: {repeat_analysis['total_repeat_length']}, "
-                    f"found {len(repeat_analysis['repeats'])} repeat patterns"
-                ]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [
+                f"Formula has too many consecutive repeated characters, "
+                f"total repeat length: {repeat_analysis['total_repeat_length']}, "
+                f"found {len(repeat_analysis['repeats'])} repeat patterns"
+            ]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
 
         return res
 
@@ -693,18 +639,18 @@ class RuleEnterAndSpace(BaseRule):
     }
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         for r in [RuleEnterMore, RuleEnterRatioMore, RuleSpaceMore]:
             tmp_res = r.eval(input_data)
-            if tmp_res.eval_status:
-                res.eval_status = True
-                if isinstance(tmp_res.eval_details, dict):
-                    tmp_res.eval_details = EvalDetail(**tmp_res.eval_details)
-                res.eval_details.merge(tmp_res.eval_details)
+            if tmp_res.status:
+                res.status = True
+                # res.merge(tmp_res)
+                res.label = [f"{cls.metric_type}.{cls.__name__}"]
+                res.reason = [] if res.reason is None else res.reason.extend(tmp_res.reason)
         # Set QUALITY_GOOD when all checks pass
-        if not res.eval_status:
-            res.eval_details = EvalDetail(label=[QualityLabel.QUALITY_GOOD])
+        if not res.status:
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -740,23 +686,18 @@ class RuleEnterMore(BaseRule):
     dynamic_config = EvaluatorRuleArgs(key_list=[r"\n{8,}", r"\r\n{8,}"])
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         for p in cls.dynamic_config.key_list:
             SEARCH_REGEX = re.compile(p)
             match = SEARCH_REGEX.search(content)
             if match:
-                res.eval_status = True
-                res.eval_details = {
-                    "label": [f"{cls.metric_type}.{cls.__name__}"],
-                    "metric": [cls.__name__],
-                    "reason": ["Content has 8 consecutive carriage returns."]
-                }
+                res.status = True
+                res.label = [f"{cls.metric_type}.{cls.__name__}"]
+                res.reason = ["Content has 8 consecutive carriage returns."]
                 return res
-        res.eval_details = {
-            "label": [QualityLabel.QUALITY_GOOD]
-        }
+        res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -792,23 +733,18 @@ class RuleEnterRatioMore(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         if len(content) == 0:
             return res
         ratio = content.count("\n") / len(content)
         if ratio > 0.25:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The number of enter / the number of content > 25%."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The number of enter / the number of content > 25%."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -829,23 +765,18 @@ class RuleHeadWordAr(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         keyword = get_xyz_head_word("ar")
         content_tail = input_data.content[-100:]
         matches = re.findall("|".join(keyword), content_tail)
         if len(matches) > 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content has irrelevance tail source info."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content has irrelevance tail source info."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -866,23 +797,18 @@ class RuleHeadWordCs(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         keyword = get_xyz_head_word("cs")
         content_tail = input_data.content[-100:]
         matches = re.findall("|".join(keyword), content_tail)
         if len(matches) > 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content has irrelevance tail source info."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content has irrelevance tail source info."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -903,23 +829,18 @@ class RuleHeadWordHu(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         keyword = get_xyz_head_word("hu")
         content_tail = input_data.content[-100:]
         matches = re.findall("|".join(keyword), content_tail)
         if len(matches) > 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content has irrelevance tail source info."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content has irrelevance tail source info."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -940,23 +861,18 @@ class RuleHeadWordKo(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         keyword = get_xyz_head_word("ko")
         content_tail = input_data.content[-100:]
         matches = re.findall("|".join(keyword), content_tail)
         if len(matches) > 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content has irrelevance tail source info."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content has irrelevance tail source info."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -977,23 +893,18 @@ class RuleHeadWordRu(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         keyword = get_xyz_head_word("ru")
         content_tail = input_data.content[-100:]
         matches = re.findall("|".join(keyword), content_tail)
         if len(matches) > 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content has irrelevance tail source info."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content has irrelevance tail source info."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1014,23 +925,18 @@ class RuleHeadWordSr(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         keyword = get_xyz_head_word("sr")
         content_tail = input_data.content[-100:]
         matches = re.findall("|".join(keyword), content_tail)
         if len(matches) > 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content has irrelevance tail source info."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content has irrelevance tail source info."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1051,23 +957,18 @@ class RuleHeadWordTh(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         keyword = get_xyz_head_word("th")
         content_tail = input_data.content[-100:]
         matches = re.findall("|".join(keyword), content_tail)
         if len(matches) > 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content has irrelevance tail source info."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content has irrelevance tail source info."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1088,23 +989,18 @@ class RuleHeadWordVi(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         keyword = get_xyz_head_word("vi")
         content_tail = input_data.content[-100:]
         matches = re.findall("|".join(keyword), content_tail)
         if len(matches) > 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content has irrelevance tail source info."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content has irrelevance tail source info."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1159,8 +1055,8 @@ class RuleHtmlEntity(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         if len(content) == 0:
             return res
@@ -1186,16 +1082,11 @@ def eval(cls, input_data: Data) -> ModelRes:
                 num += content.count(entity)
                 error_entity.append(entity)
         if num / len(content) >= 0.01:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [list(set(error_entity))]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [list(set(error_entity))]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1232,24 +1123,19 @@ class RuleHtmlTag(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         if len(content) == 0:
             return res
         matches = re.findall("|".join(cls.dynamic_config.key_list), content)
         num = len(matches)
         if num / len(content) >= 0.01:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": list(set(matches))
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = list(set(matches))
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1272,23 +1158,18 @@ class RuleIDCard(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.util import Extractor
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         match = re.search(cls.dynamic_config.pattern, input_data.content, re.I)
         if match:
             person_id = Extractor().extract_id_card(input_data.content)
             if len(person_id) != 0:
-                res.eval_status = True
-                res.eval_details = {
-                    "label": [f"{cls.metric_type}.{cls.__name__}"],
-                    "metric": [cls.__name__],
-                    "reason": [str(person_id)]
-                }
+                res.status = True
+                res.label = [f"{cls.metric_type}.{cls.__name__}"]
+                res.reason = [str(person_id)]
                 return res
-        res.eval_details = {
-            "label": [QualityLabel.QUALITY_GOOD]
-        }
+        res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1324,24 +1205,19 @@ class RuleInvisibleChar(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         if len(content) == 0:
             return res
         matches = re.findall(cls.dynamic_config.pattern, content)
         num = len(matches)
         if num / len(content) >= 0.01:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [repr(s) for s in list(set(matches))]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [repr(s) for s in list(set(matches))]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1373,23 +1249,17 @@ class RuleImageDataFormat(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
 
         raw_data = input_data.raw_data
         key_list = ["img_id", "image"]
         if all(key in raw_data for key in key_list):
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
-            return res
+            res.label = [QualityLabel.QUALITY_GOOD]
         else:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Image Data format error"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Image Data format error"]
         return res
 
 
@@ -1410,21 +1280,16 @@ class RuleLatexSpecialChar(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r"\$\$(.*?\!\!.*?)\$\$")
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         match = re.search(cls.dynamic_config.pattern, content)
         if match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [match.group(0).strip("\n")]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [match.group(0).strip("\n")]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1445,9 +1310,9 @@ class RuleLineEndWithEllipsis(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=0.3, key_list=["...", "…"])
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.util import TextSlice, split_paragraphs
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         raw_content = input_data.content
         raw_lines: Tuple[TextSlice] = split_paragraphs(
             text=raw_content, normalizer=lambda x: x, remove_empty=True
@@ -1463,16 +1328,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         )
         ratio = num_occurrences / num_lines
         if ratio > cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The ratio of lines end with ellipsis is: " + str(ratio)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The ratio of lines end with ellipsis is: " + str(ratio)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1495,9 +1355,9 @@ class RuleLineEndWithTerminal(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.util import TextSlice, split_paragraphs
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         raw_content = input_data.content
         raw_lines: Tuple[TextSlice] = split_paragraphs(
             text=raw_content, normalizer=lambda x: x, remove_empty=True
@@ -1518,16 +1378,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         )
         ratio = num_occurrences / num_lines
         if ratio < cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": list(set(terminal_marks))
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = list(set(terminal_marks))
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1562,9 +1417,9 @@ class RuleLineStartWithBulletpoint(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.util import TextSlice, split_paragraphs
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         raw_content = input_data.content
         raw_lines: Tuple[TextSlice] = split_paragraphs(
             text=raw_content, normalizer=lambda x: x, remove_empty=True
@@ -1580,16 +1435,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         )
         ratio = num_occurrences / num_lines
         if ratio > cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The ratio of lines start with bulletpoint is: " + str(ratio)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The ratio of lines start with bulletpoint is: " + str(ratio)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1610,9 +1460,9 @@ class RuleLineJavascriptCount(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=3)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.util import TextSlice, normalize, split_paragraphs
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         raw_content = input_data.content
         normalized_lines: Tuple[TextSlice] = split_paragraphs(
             text=raw_content, normalizer=normalize, remove_empty=True
@@ -1623,18 +1473,13 @@ def eval(cls, input_data: Data) -> ModelRes:
         num_occurrences = sum(["javascript" in line.text for line in normalized_lines])
         num_not_occur = num_lines - num_occurrences
         if num_not_occur < cls.dynamic_config.threshold and num_lines > 3:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [
-                    "The lines with the word Javascript is: " + str(num_occurrences)
-                ]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [
+                "The lines with the word Javascript is: " + str(num_occurrences)
+            ]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1655,9 +1500,9 @@ class RuleLoremIpsum(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=3e-08)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.util import normalize
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         normalized_content = normalize(input_data.content)
         num_normalized_content = len(normalized_content)
         if num_normalized_content == 0:
@@ -1666,16 +1511,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         num_occurrences = len(SEARCH_REGEX.findall(normalized_content))
         ratio = num_occurrences / num_normalized_content
         if ratio > cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The ratio of lorem ipsum is: " + str(ratio)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The ratio of lorem ipsum is: " + str(ratio)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1696,9 +1536,9 @@ class RuleMeanWordLength(BaseRule):
     dynamic_config = EvaluatorRuleArgs(key_list=["3", "10"])
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.util import normalize
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         normalized_content = normalize(input_data.content)
         normalized_words = tuple(normalized_content.split())
         num_normalized_words = len(normalized_words)
@@ -1708,16 +1548,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         mean_length = num_chars / num_normalized_words
         mean_length = round(mean_length, 2)
         if mean_length >= int(cls.dynamic_config.key_list[0]) and mean_length < int(cls.dynamic_config.key_list[1]):
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         else:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The mean length of word is: " + str(mean_length)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The mean length of word is: " + str(mean_length)]
         return res
 
 
@@ -1749,23 +1584,17 @@ class RuleNlpDataFormat(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
 
         raw_data = input_data.raw_data
         key_list = ["track_id", "content"]
         if all(key in raw_data for key in key_list):
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
-            return res
+            res.label = [QualityLabel.QUALITY_GOOD]
         else:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["NLP Data format error"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["NLP Data format error"]
         return res
 
 
@@ -1805,8 +1634,8 @@ class RuleNoPunc(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=112)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         paragraphs = content.split("\n")
         longest_sentence = ""
@@ -1822,16 +1651,11 @@ def eval(cls, input_data: Data) -> ModelRes:
                     max_word_count = word_count
                     longest_sentence = sentence.strip()
         if int(max_word_count) > cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [longest_sentence]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [longest_sentence]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1852,20 +1676,15 @@ class RulePatternSearch(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern="your pattern")
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         matches = re.findall(cls.dynamic_config.pattern, input_data.content)
         if matches:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": matches
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = matches
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1886,24 +1705,19 @@ class RuleSentenceNumber(BaseRule):
     dynamic_config = EvaluatorRuleArgs(key_list=["3", "7500"])
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         raw_content = input_data.content
         SENT_PATTERN = re.compile(r"\b[^.!?\n]+[.!?]*", flags=re.UNICODE)
         num_sentence = len(SENT_PATTERN.findall(raw_content))
         if num_sentence < int(cls.dynamic_config.key_list[0]) or num_sentence > int(
             cls.dynamic_config.key_list[1]
         ):
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The number of sentence is: " + str(num_sentence)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The number of sentence is: " + str(num_sentence)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -1935,23 +1749,17 @@ class RuleSftDataFormat(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
 
         raw_data = input_data.raw_data
         key_list = ["track_id", "type", "prompt", "completion"]
         if all(key in raw_data for key in key_list):
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
-            return res
+            res.label = [QualityLabel.QUALITY_GOOD]
         else:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["SFT Data format error"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["SFT Data format error"]
         return res
 
 
@@ -1987,22 +1795,17 @@ class RuleSpaceMore(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=" {500,}")
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         SEARCH_REGEX = re.compile(cls.dynamic_config.pattern)
         match = SEARCH_REGEX.search(content)
         if match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content has 500 spaces."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content has 500 spaces."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -2051,8 +1854,8 @@ class RuleSpecialCharacter(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         if len(content) == 0:
             return res
@@ -2063,16 +1866,20 @@ def eval(cls, input_data: Data) -> ModelRes:
             num += len(m)
             matches = matches + m
         if num / len(content) >= 0.01:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": list(set(matches))
-            }
+            # res.eval_status = True
+            # res.eval_details = {
+            #     "label": [f"{cls.metric_type}.{cls.__name__}"],
+            #     "metric": [cls.__name__],
+            #     "reason": list(set(matches))
+            # }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = list(set(matches))
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            # res.eval_details = {
+            #     "label": [QualityLabel.QUALITY_GOOD]
+            # }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -2093,11 +1900,11 @@ class RuleStopWord(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=0.06)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from nltk.tokenize import WordPunctTokenizer
 
         from dingo.model.rule.utils.util import get_stop_words
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         raw_content = input_data.content
         raw_words = list(WordPunctTokenizer().tokenize(raw_content))
         raw_words = [str(w).lower() for w in raw_words]
@@ -2108,16 +1915,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         num_stop_words = len(list(filter(lambda word: word in STOP_WORDS, raw_words)))
         ratio = num_stop_words / num_raw_words
         if ratio < cls.dynamic_config.threshold or num_stop_words < 2:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The ratio of stop words is: " + str(ratio)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The ratio of stop words is: " + str(ratio)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -2138,9 +1940,9 @@ class RuleSymbolWordRatio(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=0.4, key_list=["#", "...", "…"])
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from nltk.tokenize import WordPunctTokenizer
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         raw_content = input_data.content
         raw_words = tuple(WordPunctTokenizer().tokenize(raw_content))
         num_raw_words = len(raw_words)
@@ -2152,16 +1954,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         )
         ratio = num_symbols / num_words
         if ratio > cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The ratio of symbol / word is: " + str(ratio)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The ratio of symbol / word is: " + str(ratio)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -2182,9 +1979,9 @@ class RuleUniqueWords(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=0.1)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.util import normalize
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         normalized_content = normalize(input_data.content)
         normalized_words = tuple(normalized_content.split())
         num_normalized_words = len(normalized_words)
@@ -2194,16 +1991,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         num_unique_words = len(set(normalized_words))
         ratio = num_unique_words / num_words
         if ratio > cls.dynamic_config.threshold:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         else:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The ratio of unique words is: " + str(ratio)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The ratio of unique words is: " + str(ratio)]
         return res
 
 
@@ -2224,14 +2016,13 @@ class RuleUnsafeWords(BaseRule):
     dynamic_config = EvaluatorRuleArgs(refer_path=[])
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        import re
+    def eval(cls, input_data: Data) -> EvalDetail:
 
         import ahocorasick
 
         from dingo.model.rule.utils.util import get_unsafe_words
 
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         key_list = cls.dynamic_config.key_list
         if key_list is None:
@@ -2251,16 +2042,11 @@ def eval(cls, input_data: Data) -> ModelRes:
                 matches.append((start_index, keyword))
 
         if matches:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [value for index, value in matches]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [value for index, value in matches]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
     @classmethod
@@ -2303,22 +2089,16 @@ class RuleVedioDataFormat(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         raw_data = input_data.raw_data
         key_list = ["id", "video", "text"]
         if all(key in raw_data for key in key_list):
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
-            return res
+            res.label = [QualityLabel.QUALITY_GOOD]
         else:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Vedio Data format error"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Vedio Data format error"]
         return res
 
 
@@ -2357,24 +2137,19 @@ class RuleOnlyUrl(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         if len(content.strip()) == 0:
             return res
         SEARCH_REGEX = re.compile(cls.dynamic_config.pattern)
         content_without_url = SEARCH_REGEX.sub("", content)
         if len(content_without_url.strip()) == 0:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Content is only an url link."]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Content is only an url link."]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -2395,20 +2170,15 @@ class RuleWatermark(BaseRule):
     dynamic_config = EvaluatorRuleArgs(key_list=[])
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         matches = re.findall("|".join(cls.dynamic_config.key_list), input_data.content)
         if matches:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": matches
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = matches
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -2429,25 +2199,20 @@ class RuleWordNumber(BaseRule):
     dynamic_config = EvaluatorRuleArgs(key_list=["20", "100000"])
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from dingo.model.rule.utils.util import normalize
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         normalized_content = normalize(input_data.content)
         normalized_words = tuple(normalized_content.split())
         num_normalized_words = len(normalized_words)
         if num_normalized_words >= int(
             cls.dynamic_config.key_list[0]
         ) and num_normalized_words < int(cls.dynamic_config.key_list[1]):
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         else:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["The number of word is: " + str(num_normalized_words)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["The number of word is: " + str(num_normalized_words)]
         return res
 
 
@@ -2468,21 +2233,16 @@ class RuleWordSplit(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r"[A-Za-z]+-\s*$")
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         match = re.findall(cls.dynamic_config.pattern, content)
         if match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": match
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = match
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -2525,12 +2285,12 @@ class RuleWordStuck(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         import wordninja
 
         from dingo.model.rule.utils.detect_lang import decide_language_by_str
         from dingo.model.rule.utils.util import is_sha256
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         for p in cls.dynamic_config.key_list:
             content = re.sub(p, "", content)
@@ -2545,16 +2305,11 @@ def eval(cls, input_data: Data) -> ModelRes:
                 lan = decide_language_by_str(longest_string)
                 cut = wordninja.split(longest_string)
                 if lan == "en" and len(cut) > 1:
-                    res.eval_status = True
-                    res.eval_details = {
-                        "label": [f"{cls.metric_type}.{cls.__name__}"],
-                        "metric": [cls.__name__],
-                        "reason": [str(longest_string)]
-                    }
+                    res.status = True
+                    res.label = [f"{cls.metric_type}.{cls.__name__}"]
+                    res.reason = [str(longest_string)]
                     return res
-        res.eval_details = {
-            "label": [QualityLabel.QUALITY_GOOD]
-        }
+        res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
diff --git a/dingo/model/rule/rule_hallucination_hhem.py b/dingo/model/rule/rule_hallucination_hhem.py
index 970456ff..ccd46982 100644
--- a/dingo/model/rule/rule_hallucination_hhem.py
+++ b/dingo/model/rule/rule_hallucination_hhem.py
@@ -12,12 +12,12 @@
 """
 
 import json
-from typing import List, Union
+from typing import List
 
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
-from dingo.model.modelres import ModelRes
 from dingo.model.rule.base import BaseRule
 from dingo.utils import log
 
@@ -71,7 +71,7 @@ def load_model(cls):
                 raise RuntimeError(f"Failed to load HHEM model: {e}")
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         """
         Evaluate hallucination using HHEM-2.1-Open model.
 
@@ -79,7 +79,7 @@ def eval(cls, input_data: Data) -> ModelRes:
             input_data: Data object containing content and context
 
         Returns:
-            ModelRes with hallucination detection results
+            EvalDetail with hallucination detection results
         """
         # Check if context is available
         if not hasattr(input_data, 'context') or not input_data.context:
@@ -88,16 +88,13 @@ def eval(cls, input_data: Data) -> ModelRes:
                 contexts = input_data.raw_data['context']
             else:
                 # No context available - cannot evaluate
-                result = ModelRes()
-                result.eval_status = True
+                result = EvalDetail(metric=cls.__name__)
+                result.status = True
                 # result.type = cls.metric_type
                 # result.name = "MISSING_CONTEXT"
                 # result.reason = ["Context is required for HHEM hallucination detection but was not provided"]
-                result.eval_details = {
-                    "label": [f"{cls.metric_type}.MISSING_CONTEXT"],
-                    "metric": [cls.__name__],
-                    "reason": ["Context is required for HHEM hallucination detection but was not provided"]
-                }
+                result.label = [f"{cls.metric_type}.MISSING_CONTEXT"]
+                result.reason = ["Context is required for HHEM hallucination detection but was not provided"]
                 return result
         else:
             contexts = input_data.context
@@ -139,15 +136,15 @@ def eval(cls, input_data: Data) -> ModelRes:
             avg_hallucination_score = sum(hallucination_scores) / len(hallucination_scores)
 
             # Create result
-            result = ModelRes()
+            result = EvalDetail(metric=cls.__name__)
             # result.score = avg_hallucination_score
 
             # Determine if hallucination detected based on threshold
             if avg_hallucination_score > cls.dynamic_config.threshold:
-                result.eval_status = True
+                result.status = True
                 # result.type = cls.metric_type
                 # result.name = "HALLUCINATION_DETECTED"
-                result.eval_details.label = [f"{cls.metric_type}.HALLUCINATION_DETECTED"]
+                result.label = [f"{cls.metric_type}.HALLUCINATION_DETECTED"]
 
                 # Generate detailed analysis
                 analysis_parts = [
@@ -190,12 +187,12 @@ def eval(cls, input_data: Data) -> ModelRes:
                 ])
 
                 # result.reason = ["\n".join(analysis_parts)]
-                result.eval_details.reason = ["\n".join(analysis_parts)]
+                result.reason = ["\n".join(analysis_parts)]
             else:
-                result.eval_status = False
+                result.status = False
                 # result.type = "QUALITY_GOOD"
                 # result.name = "NO_HALLUCINATION"
-                result.eval_details.label = ['QUALITY_GOOD.NO_HALLUCINATION']
+                result.label = ['QUALITY_GOOD.NO_HALLUCINATION']
 
                 # Generate analysis for non-hallucination case
                 analysis = (
@@ -206,22 +203,19 @@ def eval(cls, input_data: Data) -> ModelRes:
                     f"💡 模型信息: 使用 Vectara HHEM-2.1-Open (本地推理)"
                 )
                 # result.reason = [analysis]
-                result.eval_details.reason = [analysis]
+                result.reason = [analysis]
 
             return result
 
         except Exception as e:
             # Handle model inference errors
-            result = ModelRes()
-            result.eval_status = True
+            result = EvalDetail(metric=cls.__name__)
+            result.status = True
             # result.type = cls.metric_type
             # result.name = "HHEM_ERROR"
             # result.reason = [f"HHEM model inference failed: {str(e)}"]
-            result.eval_details = {
-                "label": [f"{cls.metric_type}.HHEM_ERROR"],
-                "metric": [cls.__name__],
-                "reason": [f"HHEM model inference failed: {str(e)}"]
-            }
+            result.label = [f"{cls.metric_type}.HHEM_ERROR"]
+            result.reason = [f"HHEM model inference failed: {str(e)}"]
             return result
 
     @classmethod
@@ -245,7 +239,7 @@ def evaluate_with_detailed_output(cls, input_data: Data) -> dict:
         }
 
     @classmethod
-    def batch_evaluate(cls, data_list: List[Data]) -> List[ModelRes]:
+    def batch_evaluate(cls, data_list: List[Data]) -> List[EvalDetail]:
         """
         Batch evaluation for efficiency.
 
@@ -253,7 +247,7 @@ def batch_evaluate(cls, data_list: List[Data]) -> List[ModelRes]:
             data_list: List of Data objects to evaluate
 
         Returns:
-            List of ModelRes objects
+            List of EvalDetail objects
         """
         # Load model once for batch processing
         cls.load_model()
diff --git a/dingo/model/rule/rule_image.py b/dingo/model/rule/rule_image.py
index aef107f1..0429a794 100644
--- a/dingo/model/rule/rule_image.py
+++ b/dingo/model/rule/rule_image.py
@@ -12,8 +12,8 @@
 
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model.model import Model
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.rule.base import BaseRule
 
 
@@ -36,8 +36,8 @@ class RuleImageValid(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         if isinstance(input_data.image[0], str):
             img = Image.open(input_data.image[0])
         else:
@@ -45,16 +45,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         img_new = img.convert("RGB")
         img_np = np.asarray(img_new)
         if np.all(img_np == (255, 255, 255)) or np.all(img_np == (0, 0, 0)):
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Image is not valid: all white or black"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Image is not valid: all white or black"]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -77,8 +72,8 @@ class RuleImageSizeValid(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         if isinstance(input_data.image[0], str):
             img = Image.open(input_data.image[0])
         else:
@@ -86,19 +81,14 @@ def eval(cls, input_data: Data) -> ModelRes:
         width, height = img.size
         aspect_ratio = width / height
         if aspect_ratio > 4 or aspect_ratio < 0.25:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [
-                    "Image size is not valid, the ratio of width to height: "
-                    + str(aspect_ratio)
-                ]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [
+                "Image size is not valid, the ratio of width to height: "
+                + str(aspect_ratio)
+            ]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -121,11 +111,11 @@ class RuleImageQuality(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=5.5)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         import pyiqa
         import torch
 
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         if isinstance(input_data.image[0], str):
             img = Image.open(input_data.image[0])
         else:
@@ -137,16 +127,11 @@ def eval(cls, input_data: Data) -> ModelRes:
         score_fr = iqa_metric(img)
         score = score_fr.item()
         if score < cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Image quality is not satisfied, ratio: " + str(score)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Image quality is not satisfied, ratio: " + str(score)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -170,10 +155,10 @@ class RuleImageRepeat(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         from imagededup.methods import CNN, PHash
 
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         image_dir = input_data.content
         if len(os.listdir(image_dir)) == 0:
             raise ZeroDivisionError(
@@ -195,19 +180,14 @@ def eval(cls, input_data: Data) -> ModelRes:
             set(duplicates_cnn.keys())
         )
         if common_duplicates:
-            res.eval_status = True
+            res.status = True
             tmp_reason = [f"{image} -> {duplicates_cnn[image]}" for image in common_duplicates]
             tmp_reason.append({"duplicate_ratio": len(common_duplicates) / len(os.listdir(image_dir))})
 
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": tmp_reason
-            }
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = tmp_reason
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -230,7 +210,7 @@ class RuleImageTextSimilarity(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=0.17)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         import nltk
 
         nltk.download("punkt_tab")
@@ -239,7 +219,7 @@ def eval(cls, input_data: Data) -> ModelRes:
 
         from dingo.model.rule.utils.image_util import download_similar_tool
 
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
         if not input_data.image or not input_data.content:
             return res
         if isinstance(input_data.image[0], str):
@@ -258,16 +238,11 @@ def eval(cls, input_data: Data) -> ModelRes:
             scores.append(sim_score[0][0])
         average_score = sum(scores) / len(scores)
         if average_score < cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Image quality is not satisfied, ratio: " + str(average_score)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Image quality is not satisfied, ratio: " + str(average_score)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -288,7 +263,7 @@ class RuleImageArtimuse(BaseRule):
     dynamic_config = EvaluatorRuleArgs(threshold=6, refer_path=['https://artimuse.intern-ai.org.cn/'])
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
         try:
             response_create_task = requests.post(
                 cls.dynamic_config.refer_path[0] + 'api/v1/task/create_task',
@@ -328,28 +303,20 @@ def eval(cls, input_data: Data) -> ModelRes:
                     break
                 time.sleep(5)
 
-            res = ModelRes()
-            res.eval_status = True if status_data['score_overall'] < cls.dynamic_config.threshold else False
+            res = EvalDetail(metric=cls.__name__)
+            res.status = True if status_data['score_overall'] < cls.dynamic_config.threshold else False
             tmp = "BadImage" if status_data['score_overall'] < cls.dynamic_config.threshold else "GoodImage"
-            if res.eval_status:
-                res.eval_details = {
-                    "label": [f"Artimuse_Succeeded.{tmp}"],
-                    "metric": [cls.__name__],
-                    "reason": [json.dumps(status_data, ensure_ascii=False)]
-                }
+            if res.status:
+                res.label = [f"Artimuse_Succeeded.{tmp}"]
+                res.reason = [json.dumps(status_data, ensure_ascii=False)]
             else:
-                res.eval_details = {
-                    "label": [QualityLabel.QUALITY_GOOD]
-                }
+                res.label = [QualityLabel.QUALITY_GOOD]
             return res
         except Exception as e:
-            res = ModelRes()
-            res.eval_status = False
-            res.eval_details = {
-                "label": ["Artimuse_Fail.Exception"],
-                "metric": [cls.__name__],
-                "reason": [str(e)]
-            }
+            res = EvalDetail(metric=cls.__name__)
+            res.status = False
+            res.label = ["Artimuse_Fail.Exception"]
+            res.reason = [str(e)]
             return res
 
 
@@ -372,9 +339,9 @@ class RuleImageLabelOverlap(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
 
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
 
         try:
             # 1. 阈值参数
@@ -390,44 +357,32 @@ def eval(cls, input_data: Data) -> ModelRes:
                 try:
                     annotations = json.loads(content)
                 except json.JSONDecodeError as e:
-                    res = ModelRes()
-                    res.eval_status = False
-                    res.eval_details = {
-                        "label": ["LabelOverlap_Fail.ParseError"],
-                        "metric": [cls.__name__],
-                        "reason": [f"content解析失败：{str(e)}，前50字符：{content[:50]}..."]
-                    }
+                    res = EvalDetail(metric=cls.__name__)
+                    res.status = False
+                    res.label = ["LabelOverlap_Fail.ParseError"]
+                    res.reason = [f"content解析失败：{str(e)}，前50字符：{content[:50]}..."]
                     return res
             elif isinstance(content, dict):
                 annotations = content
             else:
-                res = ModelRes()
-                res.eval_status = False
-                res.eval_details = {
-                    "label": ["LabelOverlap_Fail.InvalidContentType"],
-                    "metric": [cls.__name__],
-                    "reason": [f"content类型错误：需dict/str，实际是{type(content).__name__}"]
-                }
+                res = EvalDetail(metric=cls.__name__)
+                res.status = False
+                res.label = ["LabelOverlap_Fail.InvalidContentType"]
+                res.reason = [f"content类型错误：需dict/str，实际是{type(content).__name__}"]
                 return res
 
             # 4. 验证数据有效性
             if not annotations:
-                res = ModelRes()
-                res.eval_status = False
-                res.eval_details = {
-                    "label": ["LabelOverlap_Fail.EmptyAnnotations"],
-                    "metric": [cls.__name__],
-                    "reason": ["annotations为空"]
-                }
+                res = EvalDetail(metric=cls.__name__)
+                res.status = False
+                res.label = ["LabelOverlap_Fail.EmptyAnnotations"]
+                res.reason = ["annotations为空"]
                 return res
             if not image_path or not os.path.exists(image_path):
-                res = ModelRes()
-                res.eval_status = False
-                res.eval_details = {
-                    "label": ["LabelOverlap_Fail.InvalidImagePath"],
-                    "metric": [cls.__name__],
-                    "reason": [f"图片路径无效：{image_path}"]
-                }
+                res = EvalDetail(metric=cls.__name__)
+                res.status = False
+                res.label = ["LabelOverlap_Fail.InvalidImagePath"]
+                res.reason = [f"图片路径无效：{image_path}"]
                 return res
 
             # 5. 提取边界框并计算重叠
@@ -480,15 +435,12 @@ def eval(cls, input_data: Data) -> ModelRes:
             # 6. 根据重叠状态设置错误信息
             if has_overlap:
                 # 符合阈值重叠：标记为错误状态
-                res.eval_status = True
-                res.eval_details = {
-                    "label": ["LabelOverlap_Fail.RuleImageLabelOverlap"],
-                    "metric": [cls.__name__],
-                    "reason": [f"重叠检测：完全重叠={len(full_overlap_pairs)}，部分重叠={len(partial_overlap_pairs)}"]
-                }
+                res.status = True
+                res.label = ["LabelOverlap_Fail.RuleImageLabelOverlap"]
+                res.reason = [f"重叠检测：完全重叠={len(full_overlap_pairs)}，部分重叠={len(partial_overlap_pairs)}"]
             else:
                 # 不符合阈值重叠：正常状态
-                res.eval_status = False
+                res.status = False
 
             # 7. 生成可视化标注框重叠图片
             vis_path = None  # 初始化vis_path变量
@@ -560,13 +512,10 @@ def eval(cls, input_data: Data) -> ModelRes:
             # 8. 整理结果（结果已通过eval_status和eval_details返回）
 
         except Exception as global_e:
-            res = ModelRes()
-            res.eval_status = False
-            res.eval_details = {
-                "label": ["LabelOverlap_Fail.GlobalError"],
-                "metric": [cls.__name__],
-                "reason": [f"全局处理错误：{str(global_e)}"]
-            }
+            res = EvalDetail(metric=cls.__name__)
+            res.status = False
+            res.label = ["LabelOverlap_Fail.GlobalError"]
+            res.reason = [f"全局处理错误：{str(global_e)}"]
 
         return res
 
@@ -590,9 +539,9 @@ class RuleImageLabelVisualization(BaseRule):
     )
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
+    def eval(cls, input_data: Data) -> EvalDetail:
 
-        res = ModelRes()
+        res = EvalDetail(metric=cls.__name__)
 
         try:
             # --------------------------
@@ -674,13 +623,10 @@ def draw_bboxes(draw_obj, elements, color_map, font_obj):
 
             # 验证图片路径有效性
             if not image_path or not os.path.exists(image_path):
-                res = ModelRes()
-                res.eval_status = False
-                res.eval_details = {
-                    "label": ["LabelVisualization_Fail.InvalidImagePath"],
-                    "metric": [cls.__name__],
-                    "reason": [f"图片路径无效/不存在：{image_path}"]
-                }
+                res = EvalDetail(metric=cls.__name__)
+                res.status = False
+                res.label = ["LabelVisualization_Fail.InvalidImagePath"]
+                res.reason = [f"图片路径无效/不存在：{image_path}"]
                 return res
 
             # 解析标注内容
@@ -688,41 +634,32 @@ def draw_bboxes(draw_obj, elements, color_map, font_obj):
                 try:
                     annotations = json.loads(content)
                 except json.JSONDecodeError as e:
-                    res = ModelRes()
-                    res.eval_status = False
-                    res.eval_details = {
-                        "label": ["LabelVisualization_Fail.ParseError"],
-                        "metric": [cls.__name__],
-                        "reason": [f"标注解析失败：{str(e)}，前50字符：{content[:50]}..."]
-                    }
+                    res = EvalDetail(metric=cls.__name__)
+                    res.status = False
+                    res.label = ["LabelVisualization_Fail.ParseError"]
+                    res.reason = [f"标注解析失败：{str(e)}，前50字符：{content[:50]}..."]
                     return res
             elif isinstance(content, dict):
                 annotations = content
             else:
-                res = ModelRes()
-                res.eval_status = False
-                res.eval_details = {
-                    "label": ["LabelVisualization_Fail.InvalidAnnotationType"],
-                    "metric": [cls.__name__],
-                    "reason": [f"标注类型错误：需dict/str，实际{type(content).__name__}"]
-                }
+                res = EvalDetail(metric=cls.__name__)
+                res.status = False
+                res.label = ["LabelVisualization_Fail.InvalidAnnotationType"]
+                res.reason = [f"标注类型错误：需dict/str，实际{type(content).__name__}"]
                 return res
 
             # 提取布局标注（适配"layout_dets"字段）
             layout_dets = annotations.get("layout_dets", [])
             if not layout_dets:
                 # 无标注数据时的处理
-                res = ModelRes()
-                res.eval_status = False
-                res.eval_details = {
-                    "label": ["LabelVisualization_Fail.EmptyLayoutData"],
-                    "metric": [cls.__name__],
-                    "reason": [json.dumps({
-                        "message": "无布局标注数据（layout_dets为空）",
-                        "visualization_path": None,
-                        "label_stats": {"total_labels": 0}
-                    }, ensure_ascii=False)]
-                }
+                res = EvalDetail(metric=cls.__name__)
+                res.status = False
+                res.label = ["LabelVisualization_Fail.EmptyLayoutData"]
+                res.reason = [json.dumps({
+                    "message": "无布局标注数据（layout_dets为空）",
+                    "visualization_path": None,
+                    "label_stats": {"total_labels": 0}
+                }, ensure_ascii=False)]
                 return res
 
             # --------------------------
@@ -770,30 +707,24 @@ def draw_bboxes(draw_obj, elements, color_map, font_obj):
             try:
                 img.save(vis_path)
             except Exception as e:
-                res = ModelRes()
-                res.eval_status = False
-                res.eval_details = {
-                    "label": ["LabelVisualization_Fail.SaveImageError"],
-                    "metric": [cls.__name__],
-                    "reason": [f"保存图像失败：{str(e)}"]
-                }
+                res = EvalDetail(metric=cls.__name__)
+                res.status = False
+                res.label = ["LabelVisualization_Fail.SaveImageError"]
+                res.reason = [f"保存图像失败：{str(e)}"]
                 return res
 
             # --------------------------
             # 5. 整理结果（结果已通过eval_status返回）
             # --------------------------
 
-            res.eval_status = False
+            res.status = False
 
         except Exception as global_e:
             # 全局异常处理
-            res = ModelRes()
-            res.eval_status = False
-            res.eval_details = {
-                "label": ["LabelVisualization_Fail.GlobalError"],
-                "metric": [cls.__name__],
-                "reason": [f"可视化处理全局错误：{str(global_e)}"]
-            }
+            res = EvalDetail(metric=cls.__name__)
+            res.status = False
+            res.label = ["LabelVisualization_Fail.GlobalError"]
+            res.reason = [f"可视化处理全局错误：{str(global_e)}"]
 
         return res
 
diff --git a/dingo/model/rule/rule_resume.py b/dingo/model/rule/rule_resume.py
index 880be4f6..f0ac185c 100644
--- a/dingo/model/rule/rule_resume.py
+++ b/dingo/model/rule/rule_resume.py
@@ -2,8 +2,8 @@
 
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model.model import Model
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.rule.base import BaseRule
 
 # ========== Privacy Issues ==========
@@ -28,21 +28,16 @@ class RuleResumeIDCard(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'\b\d{17}[\dXx]\b')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         match = re.search(cls.dynamic_config.pattern, content)
         if match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Found ID card number: " + match.group(0)[:6] + "****" + match.group(0)[-4:]]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Found ID card number: " + match.group(0)[:6] + "****" + match.group(0)[-4:]]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -65,21 +60,16 @@ class RuleResumeDetailedAddress(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'(省|市|区|县|镇|街道|路|号|室|栋|单元|楼).{0,20}(省|市|区|县|镇|街道|路|号|室|栋|单元|楼)')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         match = re.search(cls.dynamic_config.pattern, content)
         if match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Found detailed address: " + match.group(0)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Found detailed address: " + match.group(0)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -105,21 +95,16 @@ class RuleResumeEmailMissing(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         match = re.search(cls.dynamic_config.pattern, content)
         if not match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Email address not found in resume"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Email address not found in resume"]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -142,21 +127,16 @@ class RuleResumePhoneMissing(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3,4}[-.\s]?\d{4}')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         match = re.search(cls.dynamic_config.pattern, content)
         if not match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Phone number not found in resume"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Phone number not found in resume"]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -179,22 +159,17 @@ class RuleResumePhoneFormat(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'\b\d{11}\b')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         matches = re.findall(cls.dynamic_config.pattern, content)
         invalid_phones = [m for m in matches if not m.startswith(('13', '14', '15', '16', '17', '18', '19'))]
         if invalid_phones:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Invalid phone format: " + ", ".join(invalid_phones)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Invalid phone format: " + ", ".join(invalid_phones)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -220,21 +195,16 @@ class RuleResumeExcessiveWhitespace(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r' {3,}', threshold=3)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         matches = re.findall(cls.dynamic_config.pattern, content)
         if len(matches) >= cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Found " + str(len(matches)) + " instances of excessive whitespace"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Found " + str(len(matches)) + " instances of excessive whitespace"]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -257,21 +227,16 @@ class RuleResumeMarkdown(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'(#{7,}|(\*{3,})|(\_{3,}))')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         match = re.search(cls.dynamic_config.pattern, content)
         if match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Markdown syntax error: " + match.group(0)]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Markdown syntax error: " + match.group(0)]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -297,22 +262,17 @@ class RuleResumeNameMissing(BaseRule):
     dynamic_config = EvaluatorRuleArgs()
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         first_section = content[:200]
         # Check if first section contains Chinese name pattern or heading
         if not re.search(r'(^#\s*.+|^.{2,4}$)', first_section, re.MULTILINE):
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Name or heading not found in the first section"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Name or heading not found in the first section"]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -335,21 +295,16 @@ class RuleResumeSectionMissing(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'(教育|学历|工作|经历|experience|education)', threshold=1)
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content.lower()
         matches = re.findall(cls.dynamic_config.pattern, content, re.IGNORECASE)
         if len(matches) < cls.dynamic_config.threshold:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Required sections (education/experience) not found"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Required sections (education/experience) not found"]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -375,21 +330,16 @@ class RuleResumeEmoji(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F1E0-\U0001F1FF]')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         matches = re.findall(cls.dynamic_config.pattern, content)
         if matches:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Found " + str(len(matches)) + " emoji characters"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Found " + str(len(matches)) + " emoji characters"]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -412,21 +362,16 @@ class RuleResumeInformal(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'(搞定|牛逼|厉害|哈哈|嘿嘿|呵呵|啊|呀|吧|哦)')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         matches = re.findall(cls.dynamic_config.pattern, content)
         if matches:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Found informal language: " + ", ".join(set(matches))]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Found informal language: " + ", ".join(set(matches))]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -452,8 +397,8 @@ class RuleResumeDateFormat(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'\d{4}[-./年]\d{1,2}')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         matches = re.findall(cls.dynamic_config.pattern, content)
         if matches:
@@ -470,9 +415,7 @@ def eval(cls, input_data: Data) -> ModelRes:
                     "label": [QualityLabel.QUALITY_GOOD]
                 }
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -498,21 +441,16 @@ class RuleResumeEducationMissing(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'(教育|学历|education|university|college|bachelor|master|phd)')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content.lower()
         match = re.search(cls.dynamic_config.pattern, content, re.IGNORECASE)
         if not match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Education section not found in resume"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Education section not found in resume"]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
 
 
@@ -535,19 +473,14 @@ class RuleResumeExperienceMissing(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'(工作|经历|experience|employment|position|职位)')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content.lower()
         match = re.search(cls.dynamic_config.pattern, content, re.IGNORECASE)
         if not match:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": ["Work experience section not found in resume"]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = ["Work experience section not found in resume"]
         else:
-            res.eval_details = {
-                "label": [QualityLabel.QUALITY_GOOD]
-            }
+            res.label = [QualityLabel.QUALITY_GOOD]
         return res
diff --git a/dingo/model/rule/rule_xinghe.py b/dingo/model/rule/rule_xinghe.py
index 5432fae1..73cce5da 100644
--- a/dingo/model/rule/rule_xinghe.py
+++ b/dingo/model/rule/rule_xinghe.py
@@ -1,11 +1,9 @@
 import re
-import string
-from typing import Tuple
 
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model.model import Model
-from dingo.model.modelres import ModelRes, QualityLabel
 from dingo.model.rule.base import BaseRule
 
 
@@ -25,18 +23,15 @@ class RuleDoi(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern=r'^10\.\d{4,9}/([^A-Z\s]*)$')
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         if re.match(cls.dynamic_config.pattern, content):
-            res.eval_details.label = [QualityLabel.QUALITY_GOOD]
+            res.label = [QualityLabel.QUALITY_GOOD]
         else:
-            res.eval_status = True
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [content]
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [content]
         return res
 
 
@@ -94,9 +89,9 @@ def _validate_isbn13(cls, isbn: str) -> bool:
         return total % 10 == 0
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
-        res.eval_details.label = [QualityLabel.QUALITY_GOOD]
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
+        res.label = [QualityLabel.QUALITY_GOOD]
 
         content = input_data.content
         content = str(content).replace('-', '')
@@ -104,20 +99,17 @@ def eval(cls, input_data: Data) -> ModelRes:
             if cls._validate_isbn10(content):
                 pass
             else:
-                res.eval_status = True
+                res.status = True
         elif len(content) == 13:
             if cls._validate_isbn13(content):
                 pass
             else:
-                res.eval_status = True
+                res.status = True
         else:
-            res.eval_status = True
+            res.status = True
 
         # add details
-        if res.eval_status:
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": [content]
-            }
+        if res.status:
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = [content]
         return res
diff --git a/docs/en/CONTRIBUTING.md b/docs/en/CONTRIBUTING.md
index bf2226ba..169c8913 100644
--- a/docs/en/CONTRIBUTING.md
+++ b/docs/en/CONTRIBUTING.md
@@ -178,35 +178,35 @@ Style configurations can be found in `setup.cfg` and `.pre-commit-config.yaml`.
 from typing import List, Optional
 
 from dingo.io.input import Data
-from dingo.model.modelres import ModelRes
+from dingo.io.output.eval_detail import EvalDetail
 
 
 class ExampleRule:
-    """Example rule for demonstration purposes.
+  """Example rule for demonstration purposes.
 
-    This rule checks for specific patterns in text data.
+  This rule checks for specific patterns in text data.
 
-    Args:
-        pattern: Regular expression pattern to match
-        threshold: Minimum threshold for rule activation
-    """
+  Args:
+      pattern: Regular expression pattern to match
+      threshold: Minimum threshold for rule activation
+  """
 
-    def __init__(self, pattern: str, threshold: float = 0.5) -> None:
-        self.pattern = pattern
-        self.threshold = threshold
+  def __init__(self, pattern: str, threshold: float = 0.5) -> None:
+    self.pattern = pattern
+    self.threshold = threshold
 
-    def eval(self, input_data: Data) -> ModelRes:
-        """Evaluate input data against the rule.
+  def eval(self, input_data: Data) -> EvalDetail:
+    """Evaluate input data against the rule.
 
-        Args:
-            input_data: Input data to evaluate
+    Args:
+        input_data: Input data to evaluate
 
-        Returns:
-            ModelRes: Evaluation result
-        """
-        res = ModelRes()
-        # Implementation here
-        return res
+    Returns:
+        EvalDetail: Evaluation result
+    """
+    res = EvalDetail()
+    # Implementation here
+    return res
 ```
 
 ## Contributing Guidelines
@@ -227,24 +227,26 @@ class ExampleRule:
 4. **Document the rule** with clear docstrings and examples
 
 Example:
+
 ```python
 from dingo.model import Model
 from dingo.model.rule.base import BaseRule
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
-from dingo.model.modelres import ModelRes
+from dingo.io.output.eval_detail import EvalDetail
+
 
 @Model.rule_register('QUALITY_BAD_CUSTOM', ['default'])
 class CustomRule(BaseRule):
-    """Custom rule for specific quality check."""
+  """Custom rule for specific quality check."""
 
-    dynamic_config = EvaluatorRuleArgs(pattern=r'custom_pattern')
+  dynamic_config = EvaluatorRuleArgs(pattern=r'custom_pattern')
 
-    @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
-        # Implementation
-        return res
+  @classmethod
+  def eval(cls, input_data: Data) -> EvalDetail:
+    res = EvalDetail()
+    # Implementation
+    return res
 ```
 
 ### Adding New LLM Models
diff --git a/examples/register/sdk_register_llm.py b/examples/register/sdk_register_llm.py
index a06b57a8..c28ea179 100644
--- a/examples/register/sdk_register_llm.py
+++ b/examples/register/sdk_register_llm.py
@@ -1,12 +1,7 @@
-import json
 import os
 
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
-from dingo.model.modelres import ModelRes
-from dingo.model.response.response_class import ResponseScoreTypeNameReason
-from dingo.utils import log
-from dingo.utils.exception import ConvertJsonError
 
 OPENAI_MODEL = 'deepseek-chat'
 OPENAI_URL = 'https://api.deepseek.com/v1'
diff --git a/examples/register/sdk_register_rule.py b/examples/register/sdk_register_rule.py
index 31017af1..4b33f3de 100644
--- a/examples/register/sdk_register_rule.py
+++ b/examples/register/sdk_register_rule.py
@@ -2,8 +2,8 @@
 
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model.model import Model
-from dingo.model.modelres import ModelRes
 from dingo.model.rule.base import BaseRule
 
 
@@ -13,19 +13,13 @@ class CommonPatternDemo(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern = "blue")
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         matches = re.findall(cls.dynamic_config.pattern, input_data.content)
         if matches:
-            res.eval_status = True
-            # res.type = cls.metric_type
-            # res.name = cls.__name__
-            # res.reason = matches
-            res.eval_details = {
-                "label": [f"{cls.metric_type}.{cls.__name__}"],
-                "metric": [cls.__name__],
-                "reason": matches
-            }
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}"]
+            res.reason = matches
         return res
 
 
diff --git a/test/scripts/exec/test_local.py b/test/scripts/exec/test_local.py
index aa50ad42..5b9a1836 100644
--- a/test/scripts/exec/test_local.py
+++ b/test/scripts/exec/test_local.py
@@ -3,6 +3,7 @@
 from dingo.config import InputArgs
 from dingo.exec import Executor, LocalExecutor
 from dingo.io import ResultInfo
+from dingo.io.output.eval_detail import EvalDetail
 
 
 class TestLocal:
@@ -15,11 +16,14 @@ def test_merge_result_info(self):
             },
             eval_status = True,
             eval_details = {
-                "content": {
-                    "label": ["QUALITY_BAD_EFFECTIVENESS-RuleColonEnd"],
-                    "metric": ["RuleColonEnd"],
-                    "reason": ["�I am 8 years old. ^I love apple because:"]
-                }
+                "content": [
+                    EvalDetail(
+                        metric="RuleColonEnd",
+                        status=True,
+                        label=["QUALITY_BAD_EFFECTIVENESS-RuleColonEnd"],
+                        reason=["�I am 8 years old. ^I love apple because:"]
+                    )
+                ]
             }
         )
         new_item2 = ResultInfo(
@@ -29,11 +33,14 @@ def test_merge_result_info(self):
             },
             eval_status = True,
             eval_details = {
-                "content": {
-                    "label": ["QUALITY_BAD_EFFECTIVENESS-PromptContentChaos"],
-                    "metric": ["PromptContentChaos"],
-                    "reason": ["文本中包含不可见字符或乱码（如�和^），可能影响阅读理解。"]
-                }
+                "content": [
+                    EvalDetail(
+                        metric="PromptContentChaos",
+                        status=True,
+                        label=["QUALITY_BAD_EFFECTIVENESS-PromptContentChaos"],
+                        reason=["文本中包含不可见字符或乱码（如�和^），可能影响阅读理解。"]
+                    )
+                ]
             }
         )
 
@@ -46,13 +53,30 @@ def test_merge_result_info(self):
         new_existing_list = localexecutor.merge_result_info(existing_list, new_item1)
         new_existing_list = localexecutor.merge_result_info(new_existing_list, new_item2)
         assert len(new_existing_list) == 1
-        assert len(new_existing_list[0].eval_details.get('content').label) == 2
-        assert len(new_existing_list[0].eval_details.get('content').metric) == 2
-        assert len(new_existing_list[0].eval_details.get('content').reason) == 2
-        assert "QUALITY_BAD_EFFECTIVENESS-RuleColonEnd" in new_existing_list[0].eval_details.get('content').label
-        assert "QUALITY_BAD_EFFECTIVENESS-PromptContentChaos" in new_existing_list[0].eval_details.get('content').label
-        assert "�I am 8 years old. ^I love apple because:" in new_existing_list[0].eval_details.get('content').reason
-        assert "文本中包含不可见字符或乱码（如�和^），可能影响阅读理解。" in new_existing_list[0].eval_details.get('content').reason
+
+        # 获取合并后的 content 字段的 EvalDetail 列表
+        content_details = new_existing_list[0].eval_details.get('content')
+        assert len(content_details) == 2
+
+        # 收集所有的 label, metric, reason
+        all_labels = []
+        all_metrics = []
+        all_reasons = []
+        for detail in content_details:
+            if detail.label:
+                all_labels.extend(detail.label)
+            if detail.metric:
+                all_metrics.append(detail.metric)
+            if detail.reason:
+                all_reasons.extend(detail.reason)
+
+        assert len(all_labels) == 2
+        assert len(all_metrics) == 2
+        assert len(all_reasons) == 2
+        assert "QUALITY_BAD_EFFECTIVENESS-RuleColonEnd" in all_labels
+        assert "QUALITY_BAD_EFFECTIVENESS-PromptContentChaos" in all_labels
+        assert "�I am 8 years old. ^I love apple because:" in all_reasons
+        assert "文本中包含不可见字符或乱码（如�和^），可能影响阅读理解。" in all_reasons
 
     def test_all_labels_config(self):
         input_data = {
diff --git a/test/scripts/io/input/test_continue.py b/test/scripts/io/input/test_continue.py
index b734265c..f260fb54 100644
--- a/test/scripts/io/input/test_continue.py
+++ b/test/scripts/io/input/test_continue.py
@@ -1,16 +1,20 @@
 import json
 import os.path
+from pathlib import Path
 
 import pytest
 
 from dingo.config import InputArgs
 from dingo.exec import Executor
 
+# 获取项目根目录
+ROOT_DIR = Path(__file__).parent.parent.parent.parent.parent
+
 
 class TestContinue:
     def test_continue_local_jsonl(self):
         input_data = {
-            "input_path": "test/data/test_local_jsonl.jsonl",
+            "input_path": str(ROOT_DIR / "test/data/test_local_jsonl.jsonl"),
             "dataset": {
                 "source": "local",
                 "format": "jsonl",
diff --git a/test/scripts/io/input/test_write.py b/test/scripts/io/input/test_write.py
index 044d6281..dc65a12a 100644
--- a/test/scripts/io/input/test_write.py
+++ b/test/scripts/io/input/test_write.py
@@ -1,16 +1,20 @@
 import os
 import shutil
+from pathlib import Path
 
 import pytest
 
 from dingo.config import InputArgs
 from dingo.exec import Executor
 
+# 获取项目根目录
+ROOT_DIR = Path(__file__).parent.parent.parent.parent.parent
+
 
 class TestWrite:
     def test_write_local_jsonl(self):
         input_data = {
-            "input_path": "test/data/test_local_jsonl.jsonl",
+            "input_path": str(ROOT_DIR / "test/data/test_local_jsonl.jsonl"),
             "dataset": {
                 "source": "local",
                 "format": "jsonl"
diff --git a/test/scripts/model/rule/test_rule_common.py b/test/scripts/model/rule/test_rule_common.py
index 4493c9f4..e872672e 100644
--- a/test/scripts/model/rule/test_rule_common.py
+++ b/test/scripts/model/rule/test_rule_common.py
@@ -1,7 +1,5 @@
-import pytest
-
 from dingo.io import Data
-from dingo.model.modelres import EvalDetail
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model.rule.rule_common import RuleDocFormulaRepeat, RuleUnsafeWords
 
 
@@ -10,21 +8,17 @@ def test_rule_doc_formula_repeat(self):
         data = Data(data_id="1",content="we are a $$x^2 + y^2 + z^2 == z^\\sqrt{4}\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots\\dots$$ , we are a $$x^2 + y^2 = z^2$$ ")
         res = RuleDocFormulaRepeat.eval(data)
         # print(res)
-        assert res.eval_status is True
-        if isinstance(res.eval_details, dict):
-            res.eval_details = EvalDetail(**res.eval_details)
-        assert res.eval_details.label == ["QUALITY_BAD_SIMILARITY.RuleDocFormulaRepeat"]
-        assert res.eval_details.metric == ["RuleDocFormulaRepeat"]
-        assert res.eval_details.reason == ["Formula has too many consecutive repeated characters, total repeat length: 130, found 1 repeat patterns"]
+        assert res.status is True
+        assert res.label == ["QUALITY_BAD_SIMILARITY.RuleDocFormulaRepeat"]
+        assert res.metric == "RuleDocFormulaRepeat"
+        assert res.reason == ["Formula has too many consecutive repeated characters, total repeat length: 130, found 1 repeat patterns"]
 
     def test_rule_unsafe_words(self):
         data = Data(data_id="", prompt="", content="java is good\n \n \n \n hello \n \n but python is better")
         r = RuleUnsafeWords
         r.dynamic_config.key_list = ['av', 'b', 'java']
         tmp = r.eval(data)
-        assert tmp.eval_status is True
-        if isinstance(tmp.eval_details, dict):
-            tmp.eval_details = EvalDetail(**tmp.eval_details)
-        assert 'av' not in tmp.eval_details.reason
-        assert 'b' not in tmp.eval_details.reason
-        assert 'java' in tmp.eval_details.reason
+        assert tmp.status is True
+        assert 'av' not in tmp.reason
+        assert 'b' not in tmp.reason
+        assert 'java' in tmp.reason
diff --git a/test/scripts/model/test_modelres.py b/test/scripts/model/test_modelres.py
index efa6ee3e..b9a6211c 100644
--- a/test/scripts/model/test_modelres.py
+++ b/test/scripts/model/test_modelres.py
@@ -1,11 +1,9 @@
-import os
-import re
 from typing import List
 
 from dingo.config.input_args import EvaluatorRuleArgs
 from dingo.io import Data
+from dingo.io.output.eval_detail import EvalDetail
 from dingo.model.model import Model
-from dingo.model.modelres import ModelRes
 from dingo.model.rule.base import BaseRule
 
 
@@ -15,21 +13,24 @@ class RegisterRuleColon(BaseRule):
     dynamic_config = EvaluatorRuleArgs(pattern = "blue")
 
     @classmethod
-    def eval(cls, input_data: Data) -> ModelRes:
-        res = ModelRes()
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
         content = input_data.content
         if len(content) <= 0:
             return res
         if content[-1] == ":":
-            res.eval_status = True
+            # res.eval_status = True
             # res.type = [cls.metric_type, 'TestType']
             # res.name = [cls.__name__, 'TestName']
             # res.reason = [content[-100:]]
-            res.eval_details = {
-                "label": [cls.metric_type, 'TestType'],
-                "metric": [cls.__name__],
-                "reason": [content[-100:]]
-            }
+            # res.eval_details = {
+            #     "label": [cls.metric_type, 'TestType'],
+            #     "metric": [cls.__name__],
+            #     "reason": [content[-100:]]
+            # }
+            res.status = True
+            res.label = [cls.metric_type, 'TestType']
+            res.reason = [content[-100:]]
         return res
 
 
@@ -44,7 +45,7 @@ def test_type_name_list(self):
 
         res = RegisterRuleColon().eval(data)
         # print(res)
-        assert isinstance(res.eval_details.label, List)
-        assert isinstance(res.eval_details.reason, List)
-        assert len(res.eval_details.label) == 2
-        assert 'TestType' in res.eval_details.label
+        assert isinstance(res.label, List)
+        assert isinstance(res.reason, List)
+        assert len(res.label) == 2
+        assert 'TestType' in res.label