MigoXLab
diff --git a/‎docs/config.md‎
Lines changed: 182 additions & 83 deletions b/‎docs/config.md‎
Lines changed: 182 additions & 83 deletions
@@ -31,21 +31,11 @@
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
 | source | str | "hugging_face" | Yes | 数据源类型，可选值：['hugging_face', 'local'] |
-| format | str | "json" | Yes | 数据格式，可选值：['json', 'jsonl', 'plaintext', 'listjson'] |
-| field | object | - | Yes | 字段映射配置 |
+| format | str | "json" | Yes | 数据格式，可选值：['json', 'jsonl', 'plaintext', 'listjson', 'image', 'multi_turn_dialog'] |
 | hf_config | object | - | No | HuggingFace 特定配置 |
-
-#### DatasetField 配置 (dataset.field)
-
-字段映射配置：
-
-| Parameter | Type | Default | Required | Description |
-|-----------|------|---------|----------|-------------|
-| id | str | "" | Depends | ID 字段名，多级用 '.' 分隔 |
-| prompt | str | "" | Depends | prompt 字段名，多级用 '.' 分隔 |
-| content | str | "" | Yes | 内容字段名，多级用 '.' 分隔 |
-| context | str | "" | Depends | 上下文字段名，多级用 '.' 分隔 |
-| image | str | "" | Depends | 图像字段名，多级用 '.' 分隔 |
+| s3_config | object | - | No | S3 存储配置 |
+| sql_config | object | - | No | SQL 数据库配置 |
+| excel_config | object | - | No | Excel 文件配置 |
 
 #### DatasetHFConfig 配置 (dataset.hf_config)
 
@@ -62,9 +52,6 @@ HuggingFace 特定配置：
 
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
-| eval_group | str | "" | Yes | 评估模型组 |
-| rule_list | list | [] | Depends | 规则函数列表 |
-| prompt_list | list | [] | Depends | prompt 列表 |
 | start_index | int | 0 | No | 开始检查的数据索引 |
 | end_index | int | -1 | No | 结束检查的数据索引 |
 | max_workers | int | 1 | No | 最大并发工作线程数 |
@@ -78,41 +65,71 @@ HuggingFace 特定配置：
 
 | Parameter  | Type | Default | Required | Description |
 |------------|------|---------|----------|-------------|
-| bad        | bool | false   | No       | 是否保存错误结果    |
+| bad        | bool | true    | No       | 是否保存错误结果    |
 | good       | bool | false   | No       | 是否保存正确结果    |
 | all_labels | bool | false   | No       | 是否保存所有标签    |
 | raw        | bool | false   | No       | 是否保存原始数据    |
 
 ### Evaluator 配置 (evaluator)
 
-评估器相关配置：
+评估器配置采用数组形式，支持多个评估管道（EvalPipline）：
+
+| Parameter | Type | Default | Required | Description |
+|-----------|------|---------|----------|-------------|
+| evaluator | array | [] | Yes | 评估管道数组 |
+
+#### EvalPipline 配置 (evaluator[])
+
+每个评估管道包含字段映射和评估器列表：
 
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
-| rule_config | object | {} | Depends | 规则配置 |
-| llm_config | object | {} | Depends | LLM 配置 |
+| fields | object | {} | Yes | 字段映射配置，将数据字段映射到评估器需要的字段 |
+| evals | array | [] | Yes | 评估器列表 |
+
+**fields 字段映射说明**：
+
+| 映射字段 | Description |
+|----------|-------------|
+| id | 数据 ID 字段名 |
+| prompt | prompt/问题字段名 |
+| content | 内容字段名（必需） |
+| context | 上下文字段名 |
+| image | 图像字段名 |
+| reference | 参考答案字段名 |
 
-#### EvaluatorRuleArgs 配置 (evaluator.rule_config.[rule_name])
+#### EvalPiplineConfig 配置 (evaluator[].evals[])
 
-规则配置：
+单个评估器配置：
+
+| Parameter | Type | Default | Required | Description |
+|-----------|------|---------|----------|-------------|
+| name | str | - | Yes | 评估器名称（Rule 或 LLM 类名） |
+| config | object | null | No | 评估器配置参数 |
+
+#### Rule 评估器配置 (config)
+
+规则类评估器的配置参数：
 
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
 | threshold | float | null | No | 规则决策阈值 |
 | pattern | str | null | No | 匹配模式字符串 |
 | key_list | list | null | No | 匹配关键词列表 |
-| refer_path | list | null | No | 参考文件路径或小模型路径 |
+| refer_path | list | null | No | 参考文件路径或模型路径 |
+| parameters | object | null | No | 其他自定义参数 |
 
-#### EvaluatorLLMArgs 配置 (evaluator.llm_config.[llm_name])
+#### LLM 评估器配置 (config)
 
-LLM 配置：
+LLM 类评估器的配置参数：
 
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
 | model | str | null | No | 使用的模型名称 |
-| key | str | null | No | API 密钥 |
-| api_url | str | null | No | API URL |
+| key | str | null | Yes | API 密钥 |
+| api_url | str | null | Yes | API URL |
 | parameters | object | null | No | LLM 调参配置 |
+| embedding_config | object | null | No | Embedding 模型配置 |
 
 ##### LLM Parameters 配置
 
@@ -128,70 +145,138 @@ LLM 调参配置：
 
 ## 配置文件示例
 
+### 基础示例（仅使用规则评估器）
+
 ```json
 {
   "task_name": "dingo",
-  "input_path": "test/data/test_local_json.json",
+  "input_path": "test/data/test_local_jsonl.jsonl",
   "output_path": "outputs/",
   "log_level": "WARNING",
   "use_browser": false,
 
   "dataset": {
-    "source": "hugging_face",
-    "format": "json",
-    "field": {
-      "id": "",
-      "prompt": "",
-      "content": "",
-      "context": "",
-      "image": ""
-    },
-    "hf_config": {
-      "huggingface_split": "",
-      "huggingface_config_name": null
-    }
+    "source": "local",
+    "format": "jsonl"
   },
 
   "executor": {
-    "eval_group": "",
-    "rule_list": [],
-    "prompt_list": [],
     "start_index": 0,
     "end_index": -1,
     "max_workers": 1,
     "batch_size": 1,
-    "multi_turn_mode": null,
     "result_save": {
-      "bad": false,
-      "good": false,
-      "raw": false
+      "bad": true,
+      "good": false
     }
   },
 
-  "evaluator": {
-    "rule_config": {
-      "rule_name": {
-        "threshold": 0.5,
-        "pattern": ".*",
-        "key_list": ["key1", "key2"],
-        "refer_path": ["path/to/reference"]
-      }
-    },
-    "llm_config": {
-      "openai": {
-        "model": "gpt-3.5-turbo",
-        "key": "your-api-key",
-        "api_url": "https://api.openai.com/v1/chat/completions",
-        "parameters": {
-          "temperature": 1,
-          "top_p": 1,
-          "max_tokens": 4000,
-          "presence_penalty": 0,
-          "frequency_penalty": 0
-        }
-      }
+  "evaluator": [
+    {
+      "fields": {"content": "content"},
+      "evals": [
+        {"name": "RuleColonEnd"},
+        {"name": "RuleAbnormalChar"}
+      ]
     }
-  }
+  ]
+}
+```
+
+### 使用 LLM 评估器
+
+```json
+{
+  "task_name": "llm_evaluation",
+  "input_path": "test/data/test_local_jsonl.jsonl",
+  "output_path": "outputs/",
+
+  "dataset": {
+    "source": "local",
+    "format": "jsonl"
+  },
+
+  "executor": {
+    "result_save": {
+      "bad": true,
+      "good": true
+    }
+  },
+
+  "evaluator": [
+    {
+      "fields": {"content": "content"},
+      "evals": [
+        {"name": "LLMTextQualityV4", "config": {
+          "model": "deepseek-chat",
+          "key": "your-api-key",
+          "api_url": "https://api.deepseek.com/v1"
+        }}
+      ]
+    }
+  ]
+}
+```
+
+### 混合使用规则和 LLM 评估器
+
+```json
+{
+  "task_name": "mixed_evaluation",
+  "input_path": "test/data/test_local_jsonl.jsonl",
+
+  "dataset": {
+    "source": "local",
+    "format": "jsonl"
+  },
+
+  "executor": {
+    "max_workers": 4,
+    "batch_size": 10,
+    "result_save": {
+      "bad": true,
+      "good": true
+    }
+  },
+
+  "evaluator": [
+    {
+      "fields": {"content": "content"},
+      "evals": [
+        {"name": "RuleColonEnd"},
+        {"name": "RuleAbnormalChar"},
+        {"name": "LLMTextQualityV4", "config": {
+          "model": "deepseek-chat",
+          "key": "your-api-key",
+          "api_url": "https://api.deepseek.com/v1"
+        }}
+      ]
+    }
+  ]
+}
+```
+
+### 多字段评估示例
+
+```json
+{
+  "task_name": "multi_field_evaluation",
+  "input_path": "path/to/your/data.jsonl",
+  "dataset": {
+    "source": "local",
+    "format": "jsonl"
+  },
+  "evaluator": [
+    {
+      "fields": {"prompt": "question", "content": "answer", "context": "context"},
+      "evals": [
+        {"name": "LLMHallucination", "config": {
+          "key": "your-api-key",
+          "api_url": "https://api.openai.com/v1"
+        }}
+      ]
+    }
+  ]
 }
 ```
 
@@ -204,20 +289,34 @@ dingo --input config.json
 
 ### SDK 方式
 ```python
-from dingo import InputArgs, run
-
-# 从文件加载配置
-config = InputArgs.parse_file("config.json")
-run(config)
+from dingo.config import InputArgs
+from dingo.exec import Executor
 
-# 或从字典创建配置
-config_dict = {
+# 从字典创建配置
+input_data = {
     "task_name": "my_task",
-    "input_path": "data.json",
-    # ... 其他配置
+    "input_path": "data.jsonl",
+    "dataset": {
+        "source": "local",
+        "format": "jsonl"
+    },
+    "executor": {
+        "result_save": {"bad": True, "good": True}
+    },
+    "evaluator": [
+        {
+            "fields": {"content": "content"},
+            "evals": [
+                {"name": "RuleColonEnd"}
+            ]
+        }
+    ]
 }
-config = InputArgs(**config_dict)
-run(config)
+
+input_args = InputArgs(**input_data)
+executor = Executor.exec_map["local"](input_args)
+result = executor.execute()
+print(result)
 ```
 
 ## 多轮对话模式