Merge pull request #134 from seanpjlab/dev

e06084 · web-flow · commit d23876aff4de · 2025-07-31T10:58:33.000+08:00
feat: 1. Standardize dataman class name 2. update README.md: delete changelog and add deepwiki badge
diff --git a/README.md b/README.md
@@ -14,6 +14,7 @@
   <a href="https://github.com/DataEval/dingo/network/members"><img src="https://img.shields.io/github/forks/DataEval/dingo" alt="GitHub forks"></a>
   <a href="https://github.com/DataEval/dingo/issues"><img src="https://img.shields.io/github/issues/DataEval/dingo" alt="GitHub issues"></a>
   <a href="https://mseep.ai/app/dataeval-dingo"><img src="https://mseep.net/pr/dataeval-dingo-badge.png" alt="MseeP.ai Security Assessment Badge" height="20"></a>
+  <a href="https://deepwiki.com/MigoXLab/dingo"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
 </p>
 
 </div>
@@ -33,10 +34,6 @@
 </p>
 
 
-# Changelog
-
-- 2024/12/27: Project Initialization
-
 # Introduction
 
 Dingo is a data quality evaluation tool that helps you automatically detect data quality issues in your datasets. Dingo provides a variety of built-in rules and model evaluation methods, and also supports custom evaluation methods. Dingo supports commonly used text datasets and multimodal datasets, including pre-training datasets, fine-tuning datasets, and evaluation datasets. In addition, Dingo supports multiple usage methods, including local CLI and SDK, making it easy to integrate into various evaluation platforms, such as [OpenCompass](https://github.com/open-compass/opencompass).
diff --git a/README_ja.md b/README_ja.md
@@ -14,6 +14,7 @@
   <a href="https://github.com/DataEval/dingo/network/members"><img src="https://img.shields.io/github/forks/DataEval/dingo" alt="GitHub forks"></a>
   <a href="https://github.com/DataEval/dingo/issues"><img src="https://img.shields.io/github/issues/DataEval/dingo" alt="GitHub issues"></a>
   <a href="https://mseep.ai/app/dataeval-dingo"><img src="https://mseep.net/pr/dataeval-dingo-badge.png" alt="MseeP.ai Security Assessment Badge" height="20"></a>
+  <a href="https://deepwiki.com/MigoXLab/dingo"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
 </p>
 
 </div>
@@ -33,10 +34,6 @@
 </p>
 
 
-# 更新履歴
-
-- 2024/12/27: プロジェクト初期化
-
 # はじめに
 
 Dingoは、データセット内のデータ品質問題を自動的に検出するデータ品質評価ツールです。Dingoは様々な組み込みルールとモデル評価手法を提供し、カスタム評価手法もサポートしています。Dingoは一般的に使用されるテキストデータセットとマルチモーダルデータセット（事前学習データセット、ファインチューニングデータセット、評価データセットを含む）をサポートしています。さらに、DingoはローカルCLIやSDKなど複数の使用方法をサポートし、[OpenCompass](https://github.com/open-compass/opencompass)などの様々な評価プラットフォームに簡単に統合できます。
diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -14,6 +14,7 @@
   <a href="https://github.com/DataEval/dingo/network/members"><img src="https://img.shields.io/github/forks/DataEval/dingo" alt="GitHub 分支"></a>
   <a href="https://github.com/DataEval/dingo/issues"><img src="https://img.shields.io/github/issues/DataEval/dingo" alt="GitHub 问题"></a>
   <a href="https://mseep.ai/app/dataeval-dingo"><img src="https://mseep.net/pr/dataeval-dingo-badge.png" alt="MseeP.ai 安全评估徽章" height="20"></a>
+  <a href="https://deepwiki.com/MigoXLab/dingo"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
 </p>
 
 
@@ -30,9 +31,6 @@
 
 </div>
 
-# Changelog
-
-- 2024/12/27: Project Initialization
 
 # 介绍
 
diff --git a/app_gradio/app.py b/app_gradio/app.py
@@ -236,6 +236,7 @@ def get_data_column_mapping():
         'LLMText3HHonest': ['content'],
         'LLMClassifyTopic': ['content'],
         'LLMClassifyQR': ['content'],
+        'LLMDatamanAssessment': ['content'],
         'VLMImageRelevant': ['prompt', 'content'],
 
         # rule
diff --git a/dingo/model/llm/llm_dataman_assessment.py b/dingo/model/llm/llm_dataman_assessment.py
@@ -3,17 +3,19 @@
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
 from dingo.model.modelres import ModelRes
+from dingo.model.prompt.prompt_dataman_assessment import PromptDataManAssessment
 from dingo.model.response.response_class import ResponseScoreTypeNameReason
 from dingo.utils import log
 from dingo.utils.exception import ConvertJsonError
 
 
-@Model.llm_register("dataman_assessment")
-class DatamanAssessment(BaseOpenAI):
+@Model.llm_register("LLMDatamanAssessment")
+class LLMDatamanAssessment(BaseOpenAI):
     """
     Implementation of DataMan assessment using OpenAI API.
     Evaluates text based on 14 quality standards and assigns a domain type.
     """
+    prompt = PromptDataManAssessment
 
     @classmethod
     def process_response(cls, response: str) -> ModelRes:
diff --git a/dingo/model/prompt/prompt_dataman_assessment.py b/dingo/model/prompt/prompt_dataman_assessment.py
@@ -82,7 +82,7 @@
 """
 
 
-@Model.prompt_register("DATAMAN_ASSESSMENT", [], ['dataman_assessment'])
+@Model.prompt_register("DATAMAN_ASSESSMENT", [], ['LLMDatamanAssessment'])
 class PromptDataManAssessment(BasePrompt):
 
     # Metadata for documentation generation
diff --git a/docs/metrics.md b/docs/metrics.md
@@ -55,3 +55,9 @@ This document provides comprehensive information about all quality metrics used
 | `QUALITY_BAD_IMG_RELEVANCE` | RuleImageTextSimilarity | Evaluates semantic similarity between image and text content using CLIP model | [Learning Transferable Visual Representations with Natural Language Supervision](https://arxiv.org/abs/2103.00020) (Radford et al., 2021) | N/A |
 | `QUALITY_BAD_IMG_SIMILARITY` | RuleImageRepeat | Detects duplicate images using PHash and CNN methods to ensure data diversity | [ImageNet Classification with Deep Convolutional Neural Networks](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) (Krizhevsky et al., 2012) | N/A |
 
+### Text Generation
+
+| Type | Metric | Description | Paper Source | Evaluation Results |
+|------|--------|-------------|--------------|-------------------|
+| `PromptLongVideoQa` | PromptLongVideoQa | Generate video-related question-answer pairs based on the summarized information of the input long video. | [VRBench: A Benchmark for Multi-Step Reasoning in Long Narrative Videos](https://arxiv.org/abs/2506.108572) (Jiashuo Yu et al., 2025) | N/A |
+
diff --git a/examples/dataman/dataman.py b/examples/dataman/dataman.py
@@ -21,7 +21,7 @@
     },
     "evaluator": {
         "llm_config": {
-            "dataman_assessment": {
+            "LLMDatamanAssessment": {
                 "key": "enter your key, such as:EMPTY",
                 "api_url": "enter your local llm api url, such as:http://127.0.0.1:8080/v1",
             }

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@`
`21`	`21`	`},`
`22`	`22`	`"evaluator": {`
`23`	`23`	`"llm_config": {`
`24`		`- "dataman_assessment": {`
	`24`	`+ "LLMDatamanAssessment": {`
`25`	`25`	`"key": "enter your key, such as:EMPTY",`
`26`	`26`	`"api_url": "enter your local llm api url, such as:http://127.0.0.1:8080/v1",`
`27`	`27`	`}`