MigoXLab
diff --git a/‎docs/config.md‎
Lines changed: 83 additions & 182 deletions b/‎docs/config.md‎
Lines changed: 83 additions & 182 deletions
@@ -31,11 +31,21 @@
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
 | source | str | "hugging_face" | Yes | 数据源类型，可选值：['hugging_face', 'local'] |
-| format | str | "json" | Yes | 数据格式，可选值：['json', 'jsonl', 'plaintext', 'listjson', 'image', 'multi_turn_dialog'] |
+| format | str | "json" | Yes | 数据格式，可选值：['json', 'jsonl', 'plaintext', 'listjson'] |
+| field | object | - | Yes | 字段映射配置 |
 | hf_config | object | - | No | HuggingFace 特定配置 |
-| s3_config | object | - | No | S3 存储配置 |
-| sql_config | object | - | No | SQL 数据库配置 |
-| excel_config | object | - | No | Excel 文件配置 |
+
+#### DatasetField 配置 (dataset.field)
+
+字段映射配置：
+
+| Parameter | Type | Default | Required | Description |
+|-----------|------|---------|----------|-------------|
+| id | str | "" | Depends | ID 字段名，多级用 '.' 分隔 |
+| prompt | str | "" | Depends | prompt 字段名，多级用 '.' 分隔 |
+| content | str | "" | Yes | 内容字段名，多级用 '.' 分隔 |
+| context | str | "" | Depends | 上下文字段名，多级用 '.' 分隔 |
+| image | str | "" | Depends | 图像字段名，多级用 '.' 分隔 |
 
 #### DatasetHFConfig 配置 (dataset.hf_config)
 
@@ -52,6 +62,9 @@ HuggingFace 特定配置：
 
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
+| eval_group | str | "" | Yes | 评估模型组 |
+| rule_list | list | [] | Depends | 规则函数列表 |
+| prompt_list | list | [] | Depends | prompt 列表 |
 | start_index | int | 0 | No | 开始检查的数据索引 |
 | end_index | int | -1 | No | 结束检查的数据索引 |
 | max_workers | int | 1 | No | 最大并发工作线程数 |
@@ -65,71 +78,41 @@ HuggingFace 特定配置：
 
 | Parameter  | Type | Default | Required | Description |
 |------------|------|---------|----------|-------------|
-| bad        | bool | true    | No       | 是否保存错误结果    |
+| bad        | bool | false   | No       | 是否保存错误结果    |
 | good       | bool | false   | No       | 是否保存正确结果    |
 | all_labels | bool | false   | No       | 是否保存所有标签    |
 | raw        | bool | false   | No       | 是否保存原始数据    |
 
 ### Evaluator 配置 (evaluator)
 
-评估器配置采用数组形式，支持多个评估管道（EvalPipline）：
-
-| Parameter | Type | Default | Required | Description |
-|-----------|------|---------|----------|-------------|
-| evaluator | array | [] | Yes | 评估管道数组 |
-
-#### EvalPipline 配置 (evaluator[])
-
-每个评估管道包含字段映射和评估器列表：
+评估器相关配置：
 
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
-| fields | object | {} | Yes | 字段映射配置，将数据字段映射到评估器需要的字段 |
-| evals | array | [] | Yes | 评估器列表 |
-
-**fields 字段映射说明**：
-
-| 映射字段 | Description |
-|----------|-------------|
-| id | 数据 ID 字段名 |
-| prompt | prompt/问题字段名 |
-| content | 内容字段名（必需） |
-| context | 上下文字段名 |
-| image | 图像字段名 |
-| reference | 参考答案字段名 |
+| rule_config | object | {} | Depends | 规则配置 |
+| llm_config | object | {} | Depends | LLM 配置 |
 
-#### EvalPiplineConfig 配置 (evaluator[].evals[])
+#### EvaluatorRuleArgs 配置 (evaluator.rule_config.[rule_name])
 
-单个评估器配置：
-
-| Parameter | Type | Default | Required | Description |
-|-----------|------|---------|----------|-------------|
-| name | str | - | Yes | 评估器名称（Rule 或 LLM 类名） |
-| config | object | null | No | 评估器配置参数 |
-
-#### Rule 评估器配置 (config)
-
-规则类评估器的配置参数：
+规则配置：
 
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
 | threshold | float | null | No | 规则决策阈值 |
 | pattern | str | null | No | 匹配模式字符串 |
 | key_list | list | null | No | 匹配关键词列表 |
-| refer_path | list | null | No | 参考文件路径或模型路径 |
-| parameters | object | null | No | 其他自定义参数 |
+| refer_path | list | null | No | 参考文件路径或小模型路径 |
 
-#### LLM 评估器配置 (config)
+#### EvaluatorLLMArgs 配置 (evaluator.llm_config.[llm_name])
 
-LLM 类评估器的配置参数：
+LLM 配置：
 
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
 | model | str | null | No | 使用的模型名称 |
-| key | str | null | Yes | API 密钥 |
-| api_url | str | null | Yes | API URL |
+| key | str | null | No | API 密钥 |
+| api_url | str | null | No | API URL |
 | parameters | object | null | No | LLM 调参配置 |
-| embedding_config | object | null | No | Embedding 模型配置 |
 
 ##### LLM Parameters 配置
 
@@ -145,138 +128,70 @@ LLM 调参配置：
 
 ## 配置文件示例
 
-### 基础示例（仅使用规则评估器）
-
 ```json
 {
   "task_name": "dingo",
-  "input_path": "test/data/test_local_jsonl.jsonl",
+  "input_path": "test/data/test_local_json.json",
   "output_path": "outputs/",
   "log_level": "WARNING",
   "use_browser": false,
 
   "dataset": {
-    "source": "local",
-    "format": "jsonl"
+    "source": "hugging_face",
+    "format": "json",
+    "field": {
+      "id": "",
+      "prompt": "",
+      "content": "",
+      "context": "",
+      "image": ""
+    },
+    "hf_config": {
+      "huggingface_split": "",
+      "huggingface_config_name": null
+    }
   },
 
   "executor": {
+    "eval_group": "",
+    "rule_list": [],
+    "prompt_list": [],
     "start_index": 0,
     "end_index": -1,
     "max_workers": 1,
     "batch_size": 1,
+    "multi_turn_mode": null,
     "result_save": {
-      "bad": true,
-      "good": false
+      "bad": false,
+      "good": false,
+      "raw": false
     }
   },
 
-  "evaluator": [
-    {
-      "fields": {"content": "content"},
-      "evals": [
-        {"name": "RuleColonEnd"},
-        {"name": "RuleAbnormalChar"}
-      ]
-    }
-  ]
-}
-```
-
-### 使用 LLM 评估器
-
-```json
-{
-  "task_name": "llm_evaluation",
-  "input_path": "test/data/test_local_jsonl.jsonl",
-  "output_path": "outputs/",
-
-  "dataset": {
-    "source": "local",
-    "format": "jsonl"
-  },
-
-  "executor": {
-    "result_save": {
-      "bad": true,
-      "good": true
-    }
-  },
-
-  "evaluator": [
-    {
-      "fields": {"content": "content"},
-      "evals": [
-        {"name": "LLMTextQualityV4", "config": {
-          "model": "deepseek-chat",
-          "key": "your-api-key",
-          "api_url": "https://api.deepseek.com/v1"
-        }}
-      ]
-    }
-  ]
-}
-```
-
-### 混合使用规则和 LLM 评估器
-
-```json
-{
-  "task_name": "mixed_evaluation",
-  "input_path": "test/data/test_local_jsonl.jsonl",
-
-  "dataset": {
-    "source": "local",
-    "format": "jsonl"
-  },
-
-  "executor": {
-    "max_workers": 4,
-    "batch_size": 10,
-    "result_save": {
-      "bad": true,
-      "good": true
-    }
-  },
-
-  "evaluator": [
-    {
-      "fields": {"content": "content"},
-      "evals": [
-        {"name": "RuleColonEnd"},
-        {"name": "RuleAbnormalChar"},
-        {"name": "LLMTextQualityV4", "config": {
-          "model": "deepseek-chat",
-          "key": "your-api-key",
-          "api_url": "https://api.deepseek.com/v1"
-        }}
-      ]
-    }
-  ]
-}
-```
-
-### 多字段评估示例
-
-```json
-{
-  "task_name": "multi_field_evaluation",
-  "input_path": "path/to/your/data.jsonl",
-  "dataset": {
-    "source": "local",
-    "format": "jsonl"
-  },
-  "evaluator": [
-    {
-      "fields": {"prompt": "question", "content": "answer", "context": "context"},
-      "evals": [
-        {"name": "LLMHallucination", "config": {
-          "key": "your-api-key",
-          "api_url": "https://api.openai.com/v1"
-        }}
-      ]
+  "evaluator": {
+    "rule_config": {
+      "rule_name": {
+        "threshold": 0.5,
+        "pattern": ".*",
+        "key_list": ["key1", "key2"],
+        "refer_path": ["path/to/reference"]
+      }
+    },
+    "llm_config": {
+      "openai": {
+        "model": "gpt-3.5-turbo",
+        "key": "your-api-key",
+        "api_url": "https://api.openai.com/v1/chat/completions",
+        "parameters": {
+          "temperature": 1,
+          "top_p": 1,
+          "max_tokens": 4000,
+          "presence_penalty": 0,
+          "frequency_penalty": 0
+        }
+      }
     }
-  ]
+  }
 }
 ```
 
@@ -289,34 +204,20 @@ dingo --input config.json
 
 ### SDK 方式
 ```python
-from dingo.config import InputArgs
-from dingo.exec import Executor
+from dingo import InputArgs, run
+
+# 从文件加载配置
+config = InputArgs.parse_file("config.json")
+run(config)
 
-# 从字典创建配置
-input_data = {
+# 或从字典创建配置
+config_dict = {
     "task_name": "my_task",
-    "input_path": "data.jsonl",
-    "dataset": {
-        "source": "local",
-        "format": "jsonl"
-    },
-    "executor": {
-        "result_save": {"bad": True, "good": True}
-    },
-    "evaluator": [
-        {
-            "fields": {"content": "content"},
-            "evals": [
-                {"name": "RuleColonEnd"}
-            ]
-        }
-    ]
+    "input_path": "data.json",
+    # ... 其他配置
 }
-
-input_args = InputArgs(**input_data)
-executor = Executor.exec_map["local"](input_args)
-result = executor.execute()
-print(result)
+config = InputArgs(**config_dict)
+run(config)
 ```
 
 ## 多轮对话模式