From 6509c4657f9cba55a9fab5b401b2812ce6af104b Mon Sep 17 00:00:00 2001 From: pekopoke <1135796875@qq.com> Date: Mon, 10 Feb 2025 11:45:01 +0800 Subject: [PATCH 1/9] test --- .idea/.gitignore | 3 +++ .idea/dingo.iml | 17 +++++++++++++++++ .idea/inspectionProfiles/profiles_settings.xml | 6 ++++++ .idea/misc.xml | 4 ++++ .idea/modules.xml | 8 ++++++++ .idea/sonarlint/issuestore/index.pb | 0 .idea/sonarlint/securityhotspotstore/index.pb | 0 .idea/vcs.xml | 6 ++++++ 8 files changed, 44 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/dingo.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/sonarlint/issuestore/index.pb create mode 100644 .idea/sonarlint/securityhotspotstore/index.pb create mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..26d33521 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/dingo.iml b/.idea/dingo.iml new file mode 100644 index 00000000..51951249 --- /dev/null +++ b/.idea/dingo.iml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000..105ce2da --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..982ad46b --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..bfba23ca --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/sonarlint/issuestore/index.pb b/.idea/sonarlint/issuestore/index.pb new file mode 100644 index 00000000..e69de29b diff --git a/.idea/sonarlint/securityhotspotstore/index.pb b/.idea/sonarlint/securityhotspotstore/index.pb new file mode 100644 index 00000000..e69de29b diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file From ed3a397522edec50731dbc30b5f33cecda51aabb Mon Sep 17 00:00:00 2001 From: pekopoke <1135796875@qq.com> Date: Mon, 10 Feb 2025 12:25:38 +0800 Subject: [PATCH 2/9] test --- .idea/.gitignore | 3 --- .idea/dingo.iml | 17 ----------------- .idea/inspectionProfiles/profiles_settings.xml | 6 ------ .idea/misc.xml | 4 ---- .idea/modules.xml | 8 -------- .idea/sonarlint/issuestore/index.pb | 0 .idea/sonarlint/securityhotspotstore/index.pb | 0 .idea/vcs.xml | 6 ------ 8 files changed, 44 deletions(-) delete mode 100644 .idea/.gitignore delete mode 100644 .idea/dingo.iml delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/sonarlint/issuestore/index.pb delete mode 100644 .idea/sonarlint/securityhotspotstore/index.pb delete mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 26d33521..00000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml diff --git a/.idea/dingo.iml b/.idea/dingo.iml deleted file mode 100644 index 51951249..00000000 --- a/.idea/dingo.iml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2da..00000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 982ad46b..00000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index bfba23ca..00000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/sonarlint/issuestore/index.pb b/.idea/sonarlint/issuestore/index.pb deleted file mode 100644 index e69de29b..00000000 diff --git a/.idea/sonarlint/securityhotspotstore/index.pb b/.idea/sonarlint/securityhotspotstore/index.pb deleted file mode 100644 index e69de29b..00000000 diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7f..00000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From 6040b69c4f3ba32f972956963a8b960ebe027a35 Mon Sep 17 00:00:00 2001 From: pekopoke <1135796875@qq.com> Date: Mon, 17 Feb 2025 17:22:59 +0800 Subject: [PATCH 3/9] add kaoti prompt V3 --- .../model/prompt/prompt_text_quality_kaoti.py | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 dingo/model/prompt/prompt_text_quality_kaoti.py diff --git a/dingo/model/prompt/prompt_text_quality_kaoti.py b/dingo/model/prompt/prompt_text_quality_kaoti.py new file mode 100644 index 00000000..de370664 --- /dev/null +++ b/dingo/model/prompt/prompt_text_quality_kaoti.py @@ -0,0 +1,137 @@ +from dingo.model.model import Model +from dingo.model.prompt.base import BasePrompt + +@Model.prompt_register("TEXT_QUALITY_KAOTI", []) +class PromptTextQualityV3Kaoti(BasePrompt): + content = """ +# Role +You are an expert in language models and data quality assessment. + +# Background +The dataset is compiled from diverse sources, including social media platforms, news outlets, academic journals, and online forums. Some datasets contain image links, which may appear in the question stem or answer. If an image link is present, it is always considered valid, correct, and reasonable. + +# Goals +Your primary task is to detect formulas, tables, and other content in the text. The text consists of five parts: +1. **Question type information string**: `q_type` +2. **Question information string**: `q_main` +3. **Options information string**: `options` +4. **Answers information string**: `std_ans` +5. **Answer explanations string**: `answer_details` + +**Note**: +- If the question type is a multiple-choice question (including single-choice, multiple-choice, and true/false questions), the `options` field must contain content and cannot be left blank. +- For non-multiple-choice question types, the `options` field is allowed to be empty. +- If the text meets any of the following negative descriptions, it will be judged as low-quality data. + +# Criteria +## 1. Completeness +### 1.1 Error_Formula +Determine whether the formulas in the text can be correctly rendered by Markdown and adhere to the rendering style of MathJax or HTML, while maintaining consistency with the question and answers. Formula errors include, but are not limited to: +- LaTeX syntax errors +- Missing formula markers (`$`) +- Mathematical symbol errors +- Missing or excessive backslashes (`\`) +- Incorrect formula answers + +### 1.2 Error_Table +Check whether the table in the text is correct. Table errors include, but are not limited to: +- Inconsistent formatting within the table +- Unreasonable typesetting +- LaTeX or Markdown syntax errors +- Mathematical symbol errors +- Missing or excessive vertical bar symbols (`|`) +- Chaotic row and column structure +- Incorrect table content + +## 2. Effectiveness +### 2.1 Error_Split_Paragraph +Identify and mark any parts in the text that may affect coherence and readability due to unreasonable line breaks (`\n`). Key considerations: +- **Sentence integrity**: Check if sentences are unnecessarily broken into multiple lines. If a sentence should logically be a single unit but is broken by a line break (`\n`), pay attention to the lack of punctuation before and after the `\n` symbol, which is usually unreasonable. +- **Examples of incorrect usage**: + - "综上所述,我们可以确定选项\nB\"城乡社区治理\"最符合题目的要求" + - "所以,\n答案是C" + - "5.**开源工具\n**:包括各种开源的大数据工具,如Hadoop、Spark、Kafka等。" + - "其他选项\nA、C、D都与集成学习的基本原理不符。" + - "以上推理过程是根据试题集\n《22-23年理论》中的内容得出的。" + - "但对20世纪\n70年代以后的浮动汇率制时期的验证却显示出对购买力平价理论不利的结果。" + - "-C选项\n(一个U盘):U盘是存储信息的物理媒介,". + +**Note**: Since the data text is a test question, the `q_main` field is allowed to contain normal sentences separated by empty brackets `()` or underscores `__`. Pay special attention to unreasonable segmentation caused by the `\n` character. + +### 2.2 Error_Ans_Format +Ensure the quality of the answer analysis (`ans_detail`) by checking whether it is detailed, accurate, and in the expected format. Guidelines: +1. **Sensitive information**: Check if the analysis contains information about the source of the exam questions, the year, or other information that should not be disclosed. If present, mark it as low-quality. +2. **Conciseness**: Assess the level of detail in the analysis. If the analysis is too concise and lacks sufficient explanation, mark it as low-quality. + +### 2.3 Error_List_Number +Analyze the content in the `q_main` and `ans_detail` fields. If a list number appears, determine whether the numbers or letters are in the correct order. If the numbers are discontinuous, missing, or in the wrong format, indicate the specific location and provide modification suggestions. + +**Note**: You do not need to check the content itself, only the correctness of the numbers or letters. + +### 2.4 Error_Content_Position +Check the following fields for positional disorder (`q_type`, `q_main`, `options`, `std_ans`, `ans_detail`): +1. **Question type (`q_type`)**: Ensure it only describes the question type (e.g., "multiple choice", "fill in the blank") and does not include the question stem, options, answers, or answer analysis. +2. **Question stem (`q_main`)**: Ensure it only contains the main content of the question and does not include options, answers, or answer analysis. +3. **Options (`options`)**: Ensure it only contains the content of the question options (e.g., "A. Option one", "B. Option two") and does not include the question stem, answers, or answer analysis. +4. **Standard answer (`std_ans`)**: Ensure it only contains the identifier of the correct answer (e.g., "A", "B") and does not include the question stem, options, or answer analysis. + +**Rules for judgment**: +1. If the `q_main` field contains text in the format of options (e.g., "A. Option one"), it is considered mixed with options. +2. If the `options` field contains the question stem or answer content, it is considered mixed with the question stem or answer. +3. If the `std_ans` field is empty or contains question stem content, it is considered mixed with the question stem. + +### 2.5 Error_Options_Format_Content +Ensure the format and content of the `options` field are correct. Guidelines: +**Option format check**: +1. Mark options with redundant serial numbers as format errors. +2. Ensure there are no duplicate options. +3. Check for extra option punctuation (e.g., incorrect: "A. .张三"; correct: "B. 李四"). + +**Option content check**: +1. Ensure each option is independent and not combined with other options. +2. Mark options with incomplete or similar content as incorrectly formatted. + +## 3. Similarity +### 3.1 Error_Duplicate_Content +Identify consecutive repeated text or multiple occurrences of characters in the text. + +# Workflow +1. **Read and Evaluate**: Analyze the text based on the outlined negative criteria. +2. **Assign Type**: + - If no negative criteria are met, assign 'Good'. + - Otherwise, assign one of ['Completeness', 'Effectiveness', 'Similarity']. +3. **Assign Name**: + - 'Good' text gets 'None'. + - 'Completeness' text gets one of ["Error_Formula","Error_Table"]. + - 'Effectiveness' text gets one of ["Error_Split_Paragraph","Error_Ans_Format","Error_List_Number","Error_Content_Position","Error_Options_Format_Content"]. + - 'Similarity' text gets 'Error_Duplicate_Content'. +4. **Assign Score**: 'Good' = 1, others = 0. +5. **Provide Reason**: Clearly state the basis for evaluation. +6. **Return in JSON**: {"score": 0/1, "type": "", "name": "", "reason": ""}. + + +# Workflow +1. **Evaluate the text**: Carefully read and understand the provided text. Assess its quality based on the negative criteria. +2. **Assign a type**: + - If the text does not violate any negative criteria, the type must be `Good`. + - If the text violates any negative criteria, the type must be one of: `Completeness`, `Effectiveness`, or `Similarity`. +3. **Assign a name**: + - If the type is `Good`, the name must be `None`. + - If the type is `Completeness`, the name must be one of: `Error_Formula` or `Error_Table`. + - If the type is `Effectiveness`, the name must be one of: `Error_Split_Paragraph`, `Error_Ans_Format`, `Error_List_Number`, `Error_Content_Position`, or `Error_Options_Format_Content`. + - If the type is `Similarity`, the name must be `Error_Duplicate_Content`. +4. **Assign a score**: + - If the type is `Good`, the score is `1`. + - If the type is not `Good`, the score is `0`. +5. **Provide a reason**: Clearly explain the evaluation result. +6. **Return the results**: Output the results in JSON format: + ```json + {"score": 0/1, "type": "", "name": "", "reason": ""} + + +# Warning +Only output JSON format data, without any extraneous content. + +# Input content +(Text to be evaluated goes here) +""" From 26e8be8a1828260d03ba49afa30ddf38de56737b Mon Sep 17 00:00:00 2001 From: pekopoke <1135796875@qq.com> Date: Wed, 26 Feb 2025 11:02:23 +0800 Subject: [PATCH 4/9] add kaoti prompt V3 --- dingo/model/prompt/prompt_text_quality_kaoti.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/dingo/model/prompt/prompt_text_quality_kaoti.py b/dingo/model/prompt/prompt_text_quality_kaoti.py index de370664..0924c62c 100644 --- a/dingo/model/prompt/prompt_text_quality_kaoti.py +++ b/dingo/model/prompt/prompt_text_quality_kaoti.py @@ -95,20 +95,6 @@ class PromptTextQualityV3Kaoti(BasePrompt): ### 3.1 Error_Duplicate_Content Identify consecutive repeated text or multiple occurrences of characters in the text. -# Workflow -1. **Read and Evaluate**: Analyze the text based on the outlined negative criteria. -2. **Assign Type**: - - If no negative criteria are met, assign 'Good'. - - Otherwise, assign one of ['Completeness', 'Effectiveness', 'Similarity']. -3. **Assign Name**: - - 'Good' text gets 'None'. - - 'Completeness' text gets one of ["Error_Formula","Error_Table"]. - - 'Effectiveness' text gets one of ["Error_Split_Paragraph","Error_Ans_Format","Error_List_Number","Error_Content_Position","Error_Options_Format_Content"]. - - 'Similarity' text gets 'Error_Duplicate_Content'. -4. **Assign Score**: 'Good' = 1, others = 0. -5. **Provide Reason**: Clearly state the basis for evaluation. -6. **Return in JSON**: {"score": 0/1, "type": "", "name": "", "reason": ""}. - # Workflow 1. **Evaluate the text**: Carefully read and understand the provided text. Assess its quality based on the negative criteria. From c746778b1f626a78b14e022282c90bc7693dbade Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Feb 2025 03:05:33 +0000 Subject: [PATCH 5/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../model/prompt/prompt_text_quality_kaoti.py | 215 +++++++++--------- 1 file changed, 108 insertions(+), 107 deletions(-) diff --git a/dingo/model/prompt/prompt_text_quality_kaoti.py b/dingo/model/prompt/prompt_text_quality_kaoti.py index 0924c62c..5ea90d2b 100644 --- a/dingo/model/prompt/prompt_text_quality_kaoti.py +++ b/dingo/model/prompt/prompt_text_quality_kaoti.py @@ -1,116 +1,117 @@ from dingo.model.model import Model from dingo.model.prompt.base import BasePrompt + @Model.prompt_register("TEXT_QUALITY_KAOTI", []) class PromptTextQualityV3Kaoti(BasePrompt): content = """ -# Role -You are an expert in language models and data quality assessment. - -# Background -The dataset is compiled from diverse sources, including social media platforms, news outlets, academic journals, and online forums. Some datasets contain image links, which may appear in the question stem or answer. If an image link is present, it is always considered valid, correct, and reasonable. - -# Goals -Your primary task is to detect formulas, tables, and other content in the text. The text consists of five parts: -1. **Question type information string**: `q_type` -2. **Question information string**: `q_main` -3. **Options information string**: `options` -4. **Answers information string**: `std_ans` -5. **Answer explanations string**: `answer_details` - -**Note**: -- If the question type is a multiple-choice question (including single-choice, multiple-choice, and true/false questions), the `options` field must contain content and cannot be left blank. -- For non-multiple-choice question types, the `options` field is allowed to be empty. -- If the text meets any of the following negative descriptions, it will be judged as low-quality data. - -# Criteria -## 1. Completeness -### 1.1 Error_Formula -Determine whether the formulas in the text can be correctly rendered by Markdown and adhere to the rendering style of MathJax or HTML, while maintaining consistency with the question and answers. Formula errors include, but are not limited to: -- LaTeX syntax errors -- Missing formula markers (`$`) -- Mathematical symbol errors -- Missing or excessive backslashes (`\`) -- Incorrect formula answers - -### 1.2 Error_Table -Check whether the table in the text is correct. Table errors include, but are not limited to: -- Inconsistent formatting within the table -- Unreasonable typesetting -- LaTeX or Markdown syntax errors -- Mathematical symbol errors -- Missing or excessive vertical bar symbols (`|`) -- Chaotic row and column structure -- Incorrect table content - -## 2. Effectiveness -### 2.1 Error_Split_Paragraph -Identify and mark any parts in the text that may affect coherence and readability due to unreasonable line breaks (`\n`). Key considerations: -- **Sentence integrity**: Check if sentences are unnecessarily broken into multiple lines. If a sentence should logically be a single unit but is broken by a line break (`\n`), pay attention to the lack of punctuation before and after the `\n` symbol, which is usually unreasonable. -- **Examples of incorrect usage**: - - "综上所述,我们可以确定选项\nB\"城乡社区治理\"最符合题目的要求" - - "所以,\n答案是C" - - "5.**开源工具\n**:包括各种开源的大数据工具,如Hadoop、Spark、Kafka等。" - - "其他选项\nA、C、D都与集成学习的基本原理不符。" - - "以上推理过程是根据试题集\n《22-23年理论》中的内容得出的。" - - "但对20世纪\n70年代以后的浮动汇率制时期的验证却显示出对购买力平价理论不利的结果。" - - "-C选项\n(一个U盘):U盘是存储信息的物理媒介,". - -**Note**: Since the data text is a test question, the `q_main` field is allowed to contain normal sentences separated by empty brackets `()` or underscores `__`. Pay special attention to unreasonable segmentation caused by the `\n` character. - -### 2.2 Error_Ans_Format -Ensure the quality of the answer analysis (`ans_detail`) by checking whether it is detailed, accurate, and in the expected format. Guidelines: -1. **Sensitive information**: Check if the analysis contains information about the source of the exam questions, the year, or other information that should not be disclosed. If present, mark it as low-quality. -2. **Conciseness**: Assess the level of detail in the analysis. If the analysis is too concise and lacks sufficient explanation, mark it as low-quality. - -### 2.3 Error_List_Number -Analyze the content in the `q_main` and `ans_detail` fields. If a list number appears, determine whether the numbers or letters are in the correct order. If the numbers are discontinuous, missing, or in the wrong format, indicate the specific location and provide modification suggestions. - -**Note**: You do not need to check the content itself, only the correctness of the numbers or letters. - -### 2.4 Error_Content_Position -Check the following fields for positional disorder (`q_type`, `q_main`, `options`, `std_ans`, `ans_detail`): -1. **Question type (`q_type`)**: Ensure it only describes the question type (e.g., "multiple choice", "fill in the blank") and does not include the question stem, options, answers, or answer analysis. -2. **Question stem (`q_main`)**: Ensure it only contains the main content of the question and does not include options, answers, or answer analysis. -3. **Options (`options`)**: Ensure it only contains the content of the question options (e.g., "A. Option one", "B. Option two") and does not include the question stem, answers, or answer analysis. -4. **Standard answer (`std_ans`)**: Ensure it only contains the identifier of the correct answer (e.g., "A", "B") and does not include the question stem, options, or answer analysis. - -**Rules for judgment**: -1. If the `q_main` field contains text in the format of options (e.g., "A. Option one"), it is considered mixed with options. -2. If the `options` field contains the question stem or answer content, it is considered mixed with the question stem or answer. -3. If the `std_ans` field is empty or contains question stem content, it is considered mixed with the question stem. - -### 2.5 Error_Options_Format_Content -Ensure the format and content of the `options` field are correct. Guidelines: -**Option format check**: -1. Mark options with redundant serial numbers as format errors. -2. Ensure there are no duplicate options. -3. Check for extra option punctuation (e.g., incorrect: "A. .张三"; correct: "B. 李四"). - -**Option content check**: -1. Ensure each option is independent and not combined with other options. -2. Mark options with incomplete or similar content as incorrectly formatted. - -## 3. Similarity -### 3.1 Error_Duplicate_Content -Identify consecutive repeated text or multiple occurrences of characters in the text. - - -# Workflow -1. **Evaluate the text**: Carefully read and understand the provided text. Assess its quality based on the negative criteria. -2. **Assign a type**: - - If the text does not violate any negative criteria, the type must be `Good`. - - If the text violates any negative criteria, the type must be one of: `Completeness`, `Effectiveness`, or `Similarity`. -3. **Assign a name**: - - If the type is `Good`, the name must be `None`. - - If the type is `Completeness`, the name must be one of: `Error_Formula` or `Error_Table`. - - If the type is `Effectiveness`, the name must be one of: `Error_Split_Paragraph`, `Error_Ans_Format`, `Error_List_Number`, `Error_Content_Position`, or `Error_Options_Format_Content`. - - If the type is `Similarity`, the name must be `Error_Duplicate_Content`. -4. **Assign a score**: - - If the type is `Good`, the score is `1`. - - If the type is not `Good`, the score is `0`. -5. **Provide a reason**: Clearly explain the evaluation result. -6. **Return the results**: Output the results in JSON format: +# Role +You are an expert in language models and data quality assessment. + +# Background +The dataset is compiled from diverse sources, including social media platforms, news outlets, academic journals, and online forums. Some datasets contain image links, which may appear in the question stem or answer. If an image link is present, it is always considered valid, correct, and reasonable. + +# Goals +Your primary task is to detect formulas, tables, and other content in the text. The text consists of five parts: +1. **Question type information string**: `q_type` +2. **Question information string**: `q_main` +3. **Options information string**: `options` +4. **Answers information string**: `std_ans` +5. **Answer explanations string**: `answer_details` + +**Note**: +- If the question type is a multiple-choice question (including single-choice, multiple-choice, and true/false questions), the `options` field must contain content and cannot be left blank. +- For non-multiple-choice question types, the `options` field is allowed to be empty. +- If the text meets any of the following negative descriptions, it will be judged as low-quality data. + +# Criteria +## 1. Completeness +### 1.1 Error_Formula +Determine whether the formulas in the text can be correctly rendered by Markdown and adhere to the rendering style of MathJax or HTML, while maintaining consistency with the question and answers. Formula errors include, but are not limited to: +- LaTeX syntax errors +- Missing formula markers (`$`) +- Mathematical symbol errors +- Missing or excessive backslashes (`\`) +- Incorrect formula answers + +### 1.2 Error_Table +Check whether the table in the text is correct. Table errors include, but are not limited to: +- Inconsistent formatting within the table +- Unreasonable typesetting +- LaTeX or Markdown syntax errors +- Mathematical symbol errors +- Missing or excessive vertical bar symbols (`|`) +- Chaotic row and column structure +- Incorrect table content + +## 2. Effectiveness +### 2.1 Error_Split_Paragraph +Identify and mark any parts in the text that may affect coherence and readability due to unreasonable line breaks (`\n`). Key considerations: +- **Sentence integrity**: Check if sentences are unnecessarily broken into multiple lines. If a sentence should logically be a single unit but is broken by a line break (`\n`), pay attention to the lack of punctuation before and after the `\n` symbol, which is usually unreasonable. +- **Examples of incorrect usage**: + - "综上所述,我们可以确定选项\nB\"城乡社区治理\"最符合题目的要求" + - "所以,\n答案是C" + - "5.**开源工具\n**:包括各种开源的大数据工具,如Hadoop、Spark、Kafka等。" + - "其他选项\nA、C、D都与集成学习的基本原理不符。" + - "以上推理过程是根据试题集\n《22-23年理论》中的内容得出的。" + - "但对20世纪\n70年代以后的浮动汇率制时期的验证却显示出对购买力平价理论不利的结果。" + - "-C选项\n(一个U盘):U盘是存储信息的物理媒介,". + +**Note**: Since the data text is a test question, the `q_main` field is allowed to contain normal sentences separated by empty brackets `()` or underscores `__`. Pay special attention to unreasonable segmentation caused by the `\n` character. + +### 2.2 Error_Ans_Format +Ensure the quality of the answer analysis (`ans_detail`) by checking whether it is detailed, accurate, and in the expected format. Guidelines: +1. **Sensitive information**: Check if the analysis contains information about the source of the exam questions, the year, or other information that should not be disclosed. If present, mark it as low-quality. +2. **Conciseness**: Assess the level of detail in the analysis. If the analysis is too concise and lacks sufficient explanation, mark it as low-quality. + +### 2.3 Error_List_Number +Analyze the content in the `q_main` and `ans_detail` fields. If a list number appears, determine whether the numbers or letters are in the correct order. If the numbers are discontinuous, missing, or in the wrong format, indicate the specific location and provide modification suggestions. + +**Note**: You do not need to check the content itself, only the correctness of the numbers or letters. + +### 2.4 Error_Content_Position +Check the following fields for positional disorder (`q_type`, `q_main`, `options`, `std_ans`, `ans_detail`): +1. **Question type (`q_type`)**: Ensure it only describes the question type (e.g., "multiple choice", "fill in the blank") and does not include the question stem, options, answers, or answer analysis. +2. **Question stem (`q_main`)**: Ensure it only contains the main content of the question and does not include options, answers, or answer analysis. +3. **Options (`options`)**: Ensure it only contains the content of the question options (e.g., "A. Option one", "B. Option two") and does not include the question stem, answers, or answer analysis. +4. **Standard answer (`std_ans`)**: Ensure it only contains the identifier of the correct answer (e.g., "A", "B") and does not include the question stem, options, or answer analysis. + +**Rules for judgment**: +1. If the `q_main` field contains text in the format of options (e.g., "A. Option one"), it is considered mixed with options. +2. If the `options` field contains the question stem or answer content, it is considered mixed with the question stem or answer. +3. If the `std_ans` field is empty or contains question stem content, it is considered mixed with the question stem. + +### 2.5 Error_Options_Format_Content +Ensure the format and content of the `options` field are correct. Guidelines: +**Option format check**: +1. Mark options with redundant serial numbers as format errors. +2. Ensure there are no duplicate options. +3. Check for extra option punctuation (e.g., incorrect: "A. .张三"; correct: "B. 李四"). + +**Option content check**: +1. Ensure each option is independent and not combined with other options. +2. Mark options with incomplete or similar content as incorrectly formatted. + +## 3. Similarity +### 3.1 Error_Duplicate_Content +Identify consecutive repeated text or multiple occurrences of characters in the text. + + +# Workflow +1. **Evaluate the text**: Carefully read and understand the provided text. Assess its quality based on the negative criteria. +2. **Assign a type**: + - If the text does not violate any negative criteria, the type must be `Good`. + - If the text violates any negative criteria, the type must be one of: `Completeness`, `Effectiveness`, or `Similarity`. +3. **Assign a name**: + - If the type is `Good`, the name must be `None`. + - If the type is `Completeness`, the name must be one of: `Error_Formula` or `Error_Table`. + - If the type is `Effectiveness`, the name must be one of: `Error_Split_Paragraph`, `Error_Ans_Format`, `Error_List_Number`, `Error_Content_Position`, or `Error_Options_Format_Content`. + - If the type is `Similarity`, the name must be `Error_Duplicate_Content`. +4. **Assign a score**: + - If the type is `Good`, the score is `1`. + - If the type is not `Good`, the score is `0`. +5. **Provide a reason**: Clearly explain the evaluation result. +6. **Return the results**: Output the results in JSON format: ```json {"score": 0/1, "type": "", "name": "", "reason": ""} From 8f13a811825a1759489114edf53be14ec098419e Mon Sep 17 00:00:00 2001 From: pekopoke <1135796875@qq.com> Date: Mon, 3 Mar 2025 14:43:21 +0800 Subject: [PATCH 6/9] add kaoti prompt md about dataset --- .../prompt/Kaoti_data_evaluated_by_prompt.md | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md diff --git a/docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md b/docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md new file mode 100644 index 00000000..b879f7fd --- /dev/null +++ b/docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md @@ -0,0 +1,78 @@ +# Dataset Kaoti + +## Dataset Introduction +This dataset aims to evaluate the accuracy of the built-in kaoti prompt words in dingo, therefore, the test question data was selected to construct the test set. + +| Field Name | Description | +|--------------|-----------------------------------------------------------------------------| +| id | DATA ID, without special meaning, users can modify it according to their own needs | +| grade_class | The classification of students based on their academic grade levels | +| major | Main area of knowledge and skills | +| content | Data to be tested | | + + + +### Dataset Composition +| Type | Count | +|---------------------------------------------------------------------------------------|-------| +| Positive Examples | 100 | +| Negative Examples:
1. ineffectiveness
2. dissimilarity
3. incompleteness | 100 | + + +## Prompt Introduction +The built-in **PromptTextQualityV3Kaoti** is used as the prompt for this test.
+Specific content can be referred to: [Introduction to PromptTextQualityV3Kaoti](../../../dingo/model/prompt/prompt_text_quality_kaoti.py)
+The built-in prompt collection can be referred to: [Prompt Collection](../../../dingo/model/prompt) + +## Evaluation Results +### Concept Introduction +Both positive and negative examples will generate corresponding summary files after evaluation, so the results need to be defined and the concepts clarified. + +| Name | Description | +|-----------|-----------------------------------------------------------------------------| +| TP | True Positive: Number of positive examples evaluated as positive | +| FP | False Positive: Number of negative examples evaluated as positive | +| TN | True Negative: Number of negative examples evaluated as negative | +| FN | False Negative: Number of positive examples evaluated as negative | +| Precision | TP / (TP + FP) Ratio of positive examples among those evaluated as positive | +| Recall | TP / (TP + FN) Ratio of positive examples correctly evaluated as positive | +| F1 | 2 * Accuracy * Recall / (Accuracy + Recall) | + +### Result Display +| Dataset Name | TP | FP | TN | FN | Precision% | Recall% | F1 | +|--------------|-----|-----|-----|-----|------------|---------|------| +| redpajama | 86 | 15 | 85 | 14 | 85 | 86 | 0.856| +## Evaluation Method + +```python +from dingo.io import InputArgs +from dingo.exec import Executor + +input_data = { + "eval_group": "v2", + "input_path": "lulindong/prompt_v4_kaoti",# s3 path :qa-huawei + "save_data": True, + "save_correct": True, + "save_raw": True, + "max_workers": 10, + "batch_size": 10, + "data_format": "jsonl", + "column_content": "content", + "custom_config": + { + "prompt_list": ["PromptTextQualityV3Kaoti"], + "llm_config": + { + "detect_text_quality_detail": + { + "key": "Your Key", + "api_url": "Your Url", + } + } + } +} +input_args = InputArgs(**input_data) +executor = Executor.exec_map["local"](input_args) +result = executor.execute() +print(result) +``` From e7e550fed6a3c0a1b01ab6d64a0f43da6660b1ff Mon Sep 17 00:00:00 2001 From: pekopoke <1135796875@qq.com> Date: Mon, 3 Mar 2025 15:10:31 +0800 Subject: [PATCH 7/9] add kaoti prompt md about dataset --- docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md b/docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md index b879f7fd..5971a204 100644 --- a/docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md +++ b/docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md @@ -3,12 +3,12 @@ ## Dataset Introduction This dataset aims to evaluate the accuracy of the built-in kaoti prompt words in dingo, therefore, the test question data was selected to construct the test set. -| Field Name | Description | -|--------------|-----------------------------------------------------------------------------| -| id | DATA ID, without special meaning, users can modify it according to their own needs | -| grade_class | The classification of students based on their academic grade levels | -| major | Main area of knowledge and skills | -| content | Data to be tested | | +| Field Name | Description | +|--------------|------------------------------------------------------------------------------------| +| id | DATA id, without special meaning, users can modify it according to their own needs | +| grade_class | The classification of students based on their academic grade levels | +| major | Main area of knowledge and skills | +| content | Data to be tested | | From 6ca18f4a3028859766996977e7f106b0c142dc1c Mon Sep 17 00:00:00 2001 From: pekopoke <1135796875@qq.com> Date: Tue, 4 Mar 2025 11:11:23 +0800 Subject: [PATCH 8/9] fix kaoti prompt md --- ...evaluated_by_prompt.md => kaoti_data_evaluated_by_prompt.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename docs/eval/prompt/{Kaoti_data_evaluated_by_prompt.md => kaoti_data_evaluated_by_prompt.md} (98%) diff --git a/docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md b/docs/eval/prompt/kaoti_data_evaluated_by_prompt.md similarity index 98% rename from docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md rename to docs/eval/prompt/kaoti_data_evaluated_by_prompt.md index 5971a204..a00db617 100644 --- a/docs/eval/prompt/Kaoti_data_evaluated_by_prompt.md +++ b/docs/eval/prompt/kaoti_data_evaluated_by_prompt.md @@ -50,7 +50,7 @@ from dingo.exec import Executor input_data = { "eval_group": "v2", - "input_path": "lulindong/prompt_v4_kaoti",# s3 path :qa-huawei + "input_path": "/your/dataset/path",# s3 path :qa-huawei "save_data": True, "save_correct": True, "save_raw": True, From 66bc0cfddeff3f18d0eff84b743027f2f3ca363f Mon Sep 17 00:00:00 2001 From: pekopoke <1135796875@qq.com> Date: Tue, 4 Mar 2025 14:03:51 +0800 Subject: [PATCH 9/9] fix kaoti prompt md --- docs/eval/prompt/kaoti_data_evaluated_by_prompt.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/eval/prompt/kaoti_data_evaluated_by_prompt.md b/docs/eval/prompt/kaoti_data_evaluated_by_prompt.md index a00db617..7b538674 100644 --- a/docs/eval/prompt/kaoti_data_evaluated_by_prompt.md +++ b/docs/eval/prompt/kaoti_data_evaluated_by_prompt.md @@ -49,7 +49,7 @@ from dingo.io import InputArgs from dingo.exec import Executor input_data = { - "eval_group": "v2", + "eval_group": "kaoti", "input_path": "/your/dataset/path",# s3 path :qa-huawei "save_data": True, "save_correct": True,