diff --git a/dingo/model/llm/hhh/llm_text_3h.py b/dingo/model/llm/hhh/llm_text_3h.py index 919d6bca..1887cc59 100644 --- a/dingo/model/llm/hhh/llm_text_3h.py +++ b/dingo/model/llm/hhh/llm_text_3h.py @@ -13,7 +13,7 @@ class LLMText3H(BaseOpenAI): def build_messages(cls, input_data): question = input_data.prompt response = input_data.content - prompt_content = cls.prompt.content % (question, response) + prompt_content = cls.prompt % (question, response) messages = [{"role": "user", "content": prompt_content}] @@ -38,15 +38,21 @@ def process_response(cls, response: str) -> EvalDetail: result = EvalDetail(metric=cls.__name__) + # Get the quality dimension name from class name + # e.g., LLMText3HHelpful -> HELPFUL + class_prefix = "LLMText3H" + if cls.__name__.startswith(class_prefix): + quality_name = cls.__name__[len(class_prefix):].upper() + else: + quality_name = cls.__name__.upper() + # eval_status if response_model.score == 1: - tmp_name = cls.prompt.__name__[8:].upper() - result.label = [f"{QualityLabel.QUALITY_GOOD}.{tmp_name}"] + result.label = [f"{QualityLabel.QUALITY_GOOD}.{quality_name}"] result.reason = [response_model.reason] if response_model.reason else ["Response meets quality criteria"] else: result.status = True - tmp_name = "NOT_" + cls.prompt.__name__[8:].upper() - result.label = [f"QUALITY_BAD.{tmp_name}"] + result.label = [f"QUALITY_BAD.NOT_{quality_name}"] result.reason = [response_model.reason] if response_model.reason else ["Response fails quality criteria"] return result diff --git a/docs/artimuse.md b/docs/artimuse.md index f71b6191..f5d92e23 100644 --- a/docs/artimuse.md +++ b/docs/artimuse.md @@ -24,7 +24,7 @@ RuleImageArtimuse 基于 ArtiMuse 在线服务对输入图片进行美学质量 ## 核心方法 -### `eval(cls, input_data: Data) -> ModelRes` +### `eval(cls, input_data: Data) -> EvalDetail` 这是规则的主要评估方法,接收包含图像 URL 的 `Data` 对象,返回评估结果。 @@ -50,20 +50,19 @@ RuleImageArtimuse 基于 ArtiMuse 在线服务对输入图片进行美学质量 #### 返回值 -返回 `ModelRes` 对象,包含以下属性: +返回 `EvalDetail` 对象,包含以下属性: -- `eval_status`: 布尔值,表示图像质量是否不合格(低于阈值) -- `type`: 评估结果类型("Artimuse_Succeeded" 或 "Artimuse_Fail") -- `name`: 评估结果名称("BadImage" 或 "GoodImage" 或 "Exception") +- `metric`: 指标名称("RuleImageArtimuse") +- `status`: 布尔值,表示图像质量是否不合格(低于阈值)(True=不合格, False=合格) +- `label`: 质量标签列表(如 ["Artimuse_Succeeded.BadImage"] 或 ["QUALITY_GOOD"]) - `reason`: 包含详细评估信息或异常信息的数组(字符串化 JSON) ## 异常处理 -当评估过程中发生异常时,返回的 `ModelRes` 对象将包含: +当评估过程中发生异常时,返回的 `EvalDetail` 对象将包含: -- `eval_status`: `False` -- `type`: `"Artimuse_Fail"` -- `name`: `"Exception"` +- `status`: `False` +- `label`: `["Artimuse_Fail.Exception"]` - `reason`: 包含异常信息的数组 ## 使用示例 diff --git a/docs/ats_resume_guide.md b/docs/ats_resume_guide.md index f137c36a..a3254230 100644 --- a/docs/ats_resume_guide.md +++ b/docs/ats_resume_guide.md @@ -16,6 +16,12 @@ ATS 工具套件用于: 分析简历与 JD 的匹配度,输出加权匹配分数和详细分析报告。 +**核心功能:** +- 语义匹配(不仅是字符串匹配) +- 同义词自动识别(如 k8s → Kubernetes) +- 负向约束识别(Excluded 技能警告) +- 基于证据的匹配(引用简历原文) + **输入字段:** | 字段 | 类型 | 必需 | 说明 | |------|------|------|------| @@ -26,8 +32,17 @@ ATS 工具套件用于: | 字段 | 类型 | 说明 | |------|------|------| | `score` | float | 匹配分数 (0.0-1.0) | -| `error_status` | bool | 是否低于阈值 (默认 0.6) | -| `reason` | List[str] | 详细分析报告 | +| `status` | bool | 是否低于阈值 (True=低于,False=通过) | +| `reason` | List[str] | 详细分析报告(文本格式) | + +**内置同义词映射 (SYNONYM_MAP):** +``` +k8s → Kubernetes, js → JavaScript, ts → TypeScript +py → Python, tf → TensorFlow, pt → PyTorch +nodejs → Node.js, postgres → PostgreSQL +aws → Amazon Web Services, gcp → Google Cloud Platform +ml → Machine Learning, dl → Deep Learning, nlp → NLP +``` ### 2. LLMResumeOptimizer(简历优化器) @@ -80,6 +95,8 @@ jd = """ match_data = Data(data_id='test_1', content=resume, prompt=jd) match_result = LLMKeywordMatcher.eval(match_data) print(f"匹配分数: {match_result.score}") +print(f"是否通过: {'通过' if not match_result.status else '未通过'}") +print(f"分析报告: {match_result.reason[0]}") # Step 2: 简历优化 optimize_data = Data( @@ -89,18 +106,23 @@ optimize_data = Data( context='{"match_details": {"missing": [{"skill": "Docker", "importance": "Required"}]}}' ) opt_result = LLMResumeOptimizer.eval(optimize_data) -print(f"优化结果: {opt_result.reason[0]}") +print(f"优化摘要: {opt_result.reason[0]}") +print(f"完整结果: {opt_result.optimized_content}") ``` ## 📊 匹配分数计算 -### 权重分配 +### 权重公式 + +``` +score = (Required_Matched × 2 + Nice_Matched × 1) / (Required_Total × 2 + Nice_Total × 1) +``` | 类别 | 权重 | 说明 | |------|------|------| -| Required (必需) | 0.7 | 缺失会显著降低分数 | -| Nice-to-have (加分) | 0.3 | 缺失影响较小 | -| Excluded (排除) | -0.1 | 存在会扣分 | +| Required (必需) | ×2 | 缺失会显著降低分数 | +| Nice-to-have (加分) | ×1 | 缺失影响较小 | +| Excluded (排除) | 不计分 | 仅生成警告,不影响分数 | ### 阈值配置 @@ -156,16 +178,31 @@ Nice-to-have (Missing): Kubernetes ### ResumeOptimizer 输出 -结果同样存放在 `result.reason[0]` 中,JSON 格式: +**`reason[0]`**: 人类可读的摘要文本 +**`optimized_content`**: 完整的 JSON 优化结果 ```python # 访问方式 result = LLMResumeOptimizer.eval(data) -import json -output = json.loads(result.reason[0]) + +# 摘要文本 +print(result.reason[0]) + +# 完整 JSON 结果 +opt = result.optimized_content +print(opt.get('optimization_summary')) +print(opt.get('section_changes')) ``` **`reason[0]` 内容示例:** +``` +Overall: 优化了专业技能板块 +Keywords Added: Docker +Associative: Kubernetes (了解概念) +Sections Modified: 专业技能 +``` + +**`optimized_content` 结构:** ```json { "optimization_summary": { diff --git a/docs/document_ocr.md b/docs/document_ocr.md index 30f7176e..9f0206e8 100644 --- a/docs/document_ocr.md +++ b/docs/document_ocr.md @@ -22,9 +22,7 @@ Dingo 提供了一种基于LLM的文档OCR解析质量评估工具,可帮助 dingo/ ├── model/ │ ├── llm/ - │ │ └── vlm_document_parsing.py # 评估器实现 - │ └── prompt/ - │ └── prompt_mineru_recognize.py # 评估提示词 + │ │ └── llm_document_parsing_ocr.py # 评估器实现(含内嵌Prompt) │── examples/ │ └── document_parser/ │ └── document_parsing_quality_ocr.py # 单条评估示例 @@ -75,11 +73,11 @@ input_data = { #### 输出结果格式 ```python -# result 是 ModelRes 对象,包含以下字段: -result.type # 错误问题一级标签: prompt中定义的一级错误大类 -result.name # 错误问题二级标签: 一级错误大类对应的详细错误标签 List[str] -result.eval_status # 错误状态: False 或 True -result.reason # 评估原因: List[str] +# result 是 EvalDetail 对象,包含以下字段: +result.metric # 指标名称: "LLMMinerURecognizeQuality" +result.label # 错误标签列表: ["error_category1.error_category2.error_label1.error_label2"] +result.status # 错误状态: False (默认值) +result.reason # 评估原因: List[str],包含完整的JSON分析结果 ``` diff --git a/docs/document_parsing_quality_guide.md b/docs/document_parsing_quality_guide.md index 9732074d..e9ca048b 100644 --- a/docs/document_parsing_quality_guide.md +++ b/docs/document_parsing_quality_guide.md @@ -1,4 +1,4 @@ -# VLMDocumentParsingQuality 文档解析评估工具 使用文档 +# VLMDocumentParsing 文档解析评估工具 使用文档 Dingo 提供了一种基于VLM的文档解析质量评估与可视化工具,可帮助您: - 评估文档解析模型输出质量 @@ -6,7 +6,7 @@ Dingo 提供了一种基于VLM的文档解析质量评估与可视化工具, ## 工具介绍 -### VLMDocumentParsingQuality:文档解析评估工具 +### VLMDocumentParsing:文档解析评估工具 #### 功能说明 该工具用于评估文档解析模型效果,具体功能包括: @@ -22,9 +22,8 @@ Dingo 提供了一种基于VLM的文档解析质量评估与可视化工具, dingo/ ├── model/ │ ├── llm/ - │ │ └── vlm_document_parsing.py # 评估器实现 - │ └── prompt/ - │ └── prompt_document_parsing.py # 评估提示词 + │ │ └── mineru/ + │ │ └── vlm_document_parsing.py # 评估器实现(含内嵌Prompt) │── examples/ │ └── document_parser/ │ └── vlm_document_parser_quality.py # 单条评估示例 @@ -64,7 +63,7 @@ input_data = { }, "evaluator": { "llm_config": { - "VLMDocumentParsingQuality": { + "VLMDocumentParsing": { "key": "", "api_url": "", } @@ -76,11 +75,11 @@ input_data = { #### 输出结果格式 ```python -# result 是 ModelRes 对象,包含以下字段: -result.type # 错误问题一级标签: prompt中定义的一级错误大类 -result.name # 错误问题二级标签: 一级错误大类对应的详细错误标签 List[str] -result.eval_status # 错误状态: False 或 True -result.reason # 评估原因: List[str] +# result 是 EvalDetail 对象,包含以下字段: +result.metric # 指标名称: "VLMDocumentParsing" +result.label # 错误标签列表: ["公式相关问题.行内公式漏检", "表格相关问题.单元格内容错误"] +result.status # 错误状态: False (默认值,该类不设置) +result.reason # 评估原因: List[str],包含完整的JSON分析结果 ``` @@ -114,7 +113,7 @@ if __name__ == '__main__': }, "evaluator": { "llm_config": { - "VLMDocumentParsingQuality": { + "VLMDocumentParsing": { "key": "", "api_url": "", } diff --git a/docs/factcheck_guide.md b/docs/factcheck_guide.md index c11d52f4..4112707f 100644 --- a/docs/factcheck_guide.md +++ b/docs/factcheck_guide.md @@ -64,14 +64,10 @@ data = Data( # 执行评估 result = LLMFactCheckPublic.eval(data) -# 查看结果 -print(f"Factual ratio: {result.score:.2%}") -print(f"Reason: {result.reason}") -print("\nDetailed results:") -for claim in result.raw_resp["results"]: - print(f"\nClaim: {claim.claim}") - print(f"Answer: {claim.answer}") - print(f"Reasoning: {claim.reasoning}") +# 查看结果 (返回 EvalDetail 对象) +print(f"是否通过: {'通过' if not result.status else '未通过'}") +print(f"标签: {result.label}") +print(f"详细原因: {result.reason[0]}") ``` ### 场景二:评估数据集 @@ -143,13 +139,10 @@ rag_data = { data = Data(**rag_data) result = LLMFactCheckPublic.eval(data) -# 分析结果 -print(f"Factual consistency: {result.score:.2%}") -for claim in result.raw_resp["results"]: - if claim.answer != "true": - print(f"\nPotential hallucination:") - print(f"Claim: {claim.claim}") - print(f"Evidence: {claim.reasoning}") +# 分析结果 (返回 EvalDetail 对象) +print(f"是否通过: {'通过' if not result.status else '未通过'}") +print(f"标签: {result.label}") +print(f"详细原因: {result.reason[0]}") ``` ### 场景四:多轮对话监控 @@ -173,9 +166,10 @@ for turn in conversation: data = Data(**turn) result = LLMFactCheckPublic.eval(data) print(f"\nTurn {turn['data_id']}:") - print(f"Factual ratio: {result.score:.2%}") - if result.score < LLMFactCheckPublic.threshold: + print(f"是否通过: {'通过' if not result.status else '未通过'}") + if result.status: print("Warning: Potential misinformation detected!") + print(f"详情: {result.reason[0]}") ``` ## 最佳实践 @@ -241,30 +235,16 @@ dingo/ ### 评估结果格式 ```python -ModelRes( - score=0.85, # 事实性得分 - threshold=0.8, # 判断阈值 - reason=["Found 10 claims: 8 true, 1 false, 1 unsure..."], - raw_resp={ - "claims": ["claim1", "claim2", ...], - "results": [ - FactCheckResult( - claim="...", - answer="true", - reasoning="...", - supporting_evidence=[...] - ), - ... - ], - "metrics": { - "factual_ratio": 0.85, - "true_count": 8, - "false_count": 1, - "unsure_count": 1, - "total_claims": 10 - } - } +# LLMFactCheckPublic 返回 EvalDetail 对象 +EvalDetail( + metric="LLMFactCheckPublic", # 指标名称 + status=False, # 是否未通过 (False=通过, True=未通过) + label=["QUALITY_GOOD.FACTUALITY_CHECK_PASSED"], # 质量标签 + reason=["Found 10 claims: 8 true, 1 false, 1 unsure. Factual ratio: 80.00%"] ) + +# reason[0] 包含完整的评估摘要,格式示例: +# "Found 10 claims: 8 true, 1 false, 1 unsure. Factual ratio: 80.00%" ``` ## 参考资料 diff --git a/docs/hallucination_guide.md b/docs/hallucination_guide.md index 50ecb9a6..2ca58899 100644 --- a/docs/hallucination_guide.md +++ b/docs/hallucination_guide.md @@ -88,8 +88,7 @@ data = Data( result = RuleHallucinationHHEM.eval(data) # 查看结果 -print(f"是否检测到幻觉: {result.eval_status}") -print(f"HHEM 分数: {getattr(result, 'score', 'N/A')}") +print(f"是否检测到幻觉: {result.status}") # True=检测到幻觉, False=未检测到 print(f"详细分析: {result.reason[0]}") ``` @@ -122,9 +121,8 @@ data = Data( result = LLMHallucination.eval(data) # 查看结果 -print(f"是否检测到幻觉: {result.eval_status}") -print(f"幻觉分数: {getattr(result, 'score', 'N/A')}") -print(f"详细原因: {result.reason[0]}") +print(f"是否检测到幻觉: {result.status}") # True=检测到幻觉, False=未检测到 +print(f"详细原因: {result.reason[0]}") # 包含幻觉分数等详细信息 ``` ## 📊 批量数据集评估 @@ -280,21 +278,30 @@ results = RuleHallucinationHHEM.batch_evaluate(data_list) # 批量更高效 ## 📊 输出结果解析 -### ModelRes 字段说明 +### RuleHallucinationHHEM (EvalDetail) 字段说明 ```python -result = RuleHallucinationHHEM.eval(data) # 或 LLMHallucination.eval(data) +result = RuleHallucinationHHEM.eval(data) -# 标准字段 -result.eval_status # bool: 是否检测到幻觉 -result.type # str: 质量类型标识 -result.name # str: 检测结果名称 +# 标准字段 (EvalDetail) +result.metric # str: 指标名称 ("RuleHallucinationHHEM") +result.status # bool: 是否检测到幻觉 (True=有幻觉, False=无幻觉) +result.label # List[str]: 质量标签 (如 ["QUALITY_BAD_HALLUCINATION.HALLUCINATION_DETECTED"]) result.reason # List[str]: 详细分析原因 +``` + +### LLMHallucination (EvalDetail) 字段说明 + +```python +result = LLMHallucination.eval(data) -# 扩展字段 -result.score # float: 幻觉分数 (0.0-1.0) -result.verdict_details # List[str]: 每个上下文的判断详情(GPT 模式) -result.consistency_scores # List[float]: HHEM 原始一致性分数(HHEM 模式) +# 标准字段 (EvalDetail) +result.metric # str: 指标名称 ("LLMHallucination") +result.status # bool: 是否检测到幻觉 (True=有幻觉, False=无幻觉) +result.label # List[str]: 质量标签 + # 有幻觉: ["QUALITY_BAD_HALLUCINATION.HALLUCINATION_DETECTED"] + # 无幻觉: ["QUALITY_GOOD.NO_HALLUCINATION"] +result.reason # List[str]: 详细分析原因(包含幻觉分数信息) ``` ### 典型输出示例 @@ -357,7 +364,7 @@ def monitor_rag_response(question, generated_answer, retrieved_docs): result = RuleHallucinationHHEM.eval(data) # 本地、快速、免费 - if result.eval_status: + if result.status: logger.warning(f"检测到幻觉: {result.reason[0]}") # 触发人工审核或回答重生成 ``` @@ -387,7 +394,7 @@ def filter_hallucinated_responses(responses_with_context): # 使用本地HHEM进行快速检测 result = RuleHallucinationHHEM.eval(data) - if not result.eval_status: # 无幻觉 + if not result.status: # 无幻觉 clean_responses.append(item) else: log_quality_issue(item, result.reason[0]) @@ -427,7 +434,7 @@ class RAGWithHallucinationDetection: hallucination_result = self.detector.eval(data) # 4. 根据检测结果决定是否返回答案 - if hallucination_result.eval_status: + if hallucination_result.status: self.log_hallucination(question, generated_answer, hallucination_result) return { "answer": None, diff --git a/docs/html_extract_compare_v2.md b/docs/html_extract_compare_v2.md index 6637fae2..c0d92242 100644 --- a/docs/html_extract_compare_v2.md +++ b/docs/html_extract_compare_v2.md @@ -81,20 +81,20 @@ data = Data( ## 输出结果格式 ```python -# result 是 ModelRes 对象,包含以下字段: -result.type # 判断类型: "TOOL_ONE_BETTER" / "TOOL_EQUAL" / "TOOL_TWO_BETTER" -result.name # 判断名称: "Judgement_A" / "Judgement_B" / "Judgement_C" -result.eval_status # 错误状态: False (A/B) 或 True (C) +# result 是 EvalDetail 对象,包含以下字段: +result.metric # 指标名称: "LLMHtmlExtractCompareV2" +result.label # 判断标签: ["TOOL_ONE_BETTER.Judgement_A"] 等 +result.status # 错误状态: False (A/B) 或 True (C) result.reason # 推理过程: List[str] ``` ### 结果映射 -| 判断结果 | `result.type` | `result.name` | `result.eval_status` | 含义 | -|----------|---------------|---------------|----------------------|------| -| A | TOOL_ONE_BETTER | Judgement_A | False | 工具A提取的信息更完整 | -| B | TOOL_EQUAL | Judgement_B | False | 两个工具提取的信息量相同 | -| C | TOOL_TWO_BETTER | Judgement_C | True | 工具B提取的信息更完整 | +| 判断结果 | `result.label` | `result.status` | 含义 | +|----------|----------------|-----------------|------| +| A | ["TOOL_ONE_BETTER.Judgement_A"] | False | 工具A提取的信息更完整 | +| B | ["TOOL_EQUAL.Judgement_B"] | False | 两个工具提取的信息量相同 | +| C | ["TOOL_TWO_BETTER.Judgement_C"] | True | 工具B提取的信息更完整 | ## 使用示例 @@ -125,7 +125,7 @@ data = Data( result = evaluator.eval(data) # 查看结果 -print(f"判断: {result.type}") +print(f"判断: {result.label}") print(f"推理: {result.reason[0]}") ``` diff --git a/docs/image_lable_check_guide.md b/docs/image_lable_check_guide.md index cfa3fbd2..7b3818f0 100644 --- a/docs/image_lable_check_guide.md +++ b/docs/image_lable_check_guide.md @@ -235,39 +235,28 @@ if __name__ == '__main__': #### RuleImageLabelOverlap 输出结果格式: ```python -ModelRes( - name="RuleImageLabelOverlap" or "GOOD_IMG_LABEL", - type="IMG_LABEL_OVERLAP" or "NO_LABEL_OVERLAP", - eval_status=True/False, # 是否存在符合阈值的重叠 - reason=[json.dumps({ - "id": data_id, - "has_overlap": True/False, - "overlap_stats": { - "full_overlap_pairs": 完全重叠框数量, - "partial_overlap_pairs": 部分重叠框数量, - "total_boxes": 总边界框数 - }, - "visualization_path": 图像保存路径 - })] +EvalDetail( + metric="RuleImageLabelOverlap", + status=True/False, # 是否存在符合阈值的重叠 + label=["LabelOverlap_Fail.RuleImageLabelOverlap"], # 存在重叠时设置 + reason=["重叠检测:完全重叠=N,部分重叠=M"] # 重叠统计信息 ) ``` #### RuleImageLabelVisualization 输出结果格式: ```python -ModelRes( - name="RuleImageLabelVisualization" or "NO_LABEL_DATA", - type="IMG_LABEL_VISUALIZATION" or "NO_IMG_LABEL_VISUALIZATION", - eval_status=True/False, # 是否发生错误 - reason=[json.dumps({ - "id": data_id, - "visualization_status": "success", - "original_image_path": 原始图像路径, - "visualization_path": 可视化图像路径, - "label_stats": { - "total_labels": 总标注数, - "top_level_labels": 顶层标注数 - } - })] +EvalDetail( + metric="RuleImageLabelVisualization", + status=False, # 成功时为False + label=None, # 成功时不设置label + reason=None # 成功时不设置reason +) +# 错误时: +EvalDetail( + metric="RuleImageLabelVisualization", + status=False, + label=["LabelVisualization_Fail.错误类型"], # 如ParseError, InvalidAnnotationType等 + reason=["错误描述信息"] ) ``` diff --git a/docs/image_quality_check_guide.md b/docs/image_quality_check_guide.md index 2521b0ab..9c096455 100644 --- a/docs/image_quality_check_guide.md +++ b/docs/image_quality_check_guide.md @@ -432,54 +432,64 @@ if __name__ == '__main__': ### 10.2 输出结果格式 -#### RuleImageValid 输出结果格式: +所有图像规则返回 `EvalDetail` 对象,包含以下字段: ```python -ModelRes( - name="RuleImageValid", - type="QUALITY_BAD_IMG_EFFECTIVENESS", - eval_status=True/False, # 是否为无效图像 - reason=["Image is not valid: all white or black"] # 错误原因 +EvalDetail( + metric="RuleImageValid", # 指标名称 + status=True/False, # 是否未通过 (True=未通过, False=通过) + label=["QUALITY_BAD_IMG_EFFECTIVENESS.RuleImageValid"], # 质量标签 + reason=["Image is not valid: all white or black"] # 详细原因 ) ``` -#### RuleImageSizeValid 输出结果格式: +#### RuleImageValid 输出结果示例: ```python -ModelRes( - name="RuleImageSizeValid", - type="QUALITY_BAD_IMG_EFFECTIVENESS", - eval_status=True/False, # 图像尺寸是否无效 - reason=["Image size is not valid, the ratio of width to height: 比值"] # 错误原因 +EvalDetail( + metric="RuleImageValid", + status=True, # 是否为无效图像 + label=["QUALITY_BAD_IMG_EFFECTIVENESS.RuleImageValid"], + reason=["Image is not valid: all white or black"] ) ``` -#### RuleImageQuality 输出结果格式: +#### RuleImageSizeValid 输出结果示例: ```python -ModelRes( - name="RuleImageQuality", - type="QUALITY_BAD_IMG_EFFECTIVENESS", - eval_status=True/False, # 图像质量是否不满足要求 - reason=["Image quality is not satisfied, ratio: 评分值"] # 错误原因 +EvalDetail( + metric="RuleImageSizeValid", + status=True, # 图像尺寸是否无效 + label=["QUALITY_BAD_IMG_EFFECTIVENESS.RuleImageSizeValid"], + reason=["Image size is not valid, the ratio of width to height: 比值"] ) ``` -#### RuleImageRepeat 输出结果格式: +#### RuleImageQuality 输出结果示例: ```python -ModelRes( - name="RuleImageRepeat", - type="QUALITY_BAD_IMG_SIMILARITY", - eval_status=True/False, # 是否存在重复图像 +EvalDetail( + metric="RuleImageQuality", + status=True, # 图像质量是否不满足要求 + label=["QUALITY_BAD_IMG_EFFECTIVENESS.RuleImageQuality"], + reason=["Image quality is not satisfied, ratio: 评分值"] +) +``` + +#### RuleImageRepeat 输出结果示例: +```python +EvalDetail( + metric="RuleImageRepeat", + status=True, # 是否存在重复图像 + label=["QUALITY_BAD_IMG_SIMILARITY.RuleImageRepeat"], reason=["图像1 -> [重复图像列表]", ..., {"duplicate_ratio": 重复率}] ) ``` -#### RuleImageTextSimilarity 输出结果格式: +#### RuleImageTextSimilarity 输出结果示例: ```python -ModelRes( - name="RuleImageTextSimilarity", - type="QUALITY_BAD_IMG_RELEVANCE", - eval_status=True/False, # 图像与文本相似度是否不足 - reason=["Image quality is not satisfied, ratio: 相似度值"] # 错误原因 +EvalDetail( + metric="RuleImageTextSimilarity", + status=True, # 图像与文本相似度是否不足 + label=["QUALITY_BAD_IMG_RELEVANCE.RuleImageTextSimilarity"], + reason=["Image quality is not satisfied, ratio: 相似度值"] ) ``` diff --git a/docs/layout_quality_guide.md b/docs/layout_quality_guide.md index a28b3dbd..3210b5b5 100644 --- a/docs/layout_quality_guide.md +++ b/docs/layout_quality_guide.md @@ -21,9 +21,7 @@ Dingo 提供了一种基于VLM的Layout布局检测质量评估,可帮助您 dingo/ ├── model/ │ ├── llm/ - │ │ └── vlm_layout_quality.py # 评估器实现 - │ └── prompt/ - │ └── prompt_layout_quality.py # 评估提示词 + │ │ └── vlm_layout_quality.py # 评估器实现(含内嵌Prompt) │── examples/ │ └── document_parser/ │ └── vlm_layout_quality.py # 评估示例 @@ -36,7 +34,7 @@ dingo/ ``` ##### 评估提示词 -我们的评估效果依赖于精心设计的 Prompt。其核心思想是: +我们的评估效果依赖于精心设计的 Prompt(内嵌在 `vlm_layout_quality.py` 中)。其核心思想是: 1. Layout布局检测元素列别,我们基于Mineru的输出类型,来设定提示词。 2. 分层错误标签:我们将布局检测问题分为5个大类:检测遗漏错误、检测不准错误、类别错误、阅读顺序错、其他错误。 @@ -79,11 +77,11 @@ input_data = { #### 输出结果格式 ```python -# result 是 ModelRes 对象,包含以下字段: -result.type # 错误问题一级标签: prompt中定义错误类别 -result.name # 错误描述: 错误列别对应的详细错描述 -result.eval_status # 错误状态: False 或 True -result.reason # 评估原因: List[str] +# result 是 EvalDetail 对象,包含以下字段: +result.metric # 指标名称: "VLMLayoutQuality" +result.label # 错误标签列表: 从JSON响应中提取的eval_details字段列表 +result.status # 错误状态: False (默认值,该类不设置) +result.reason # 评估原因: List[str],包含完整的JSON分析结果 ``` diff --git a/examples/3h/3h_eval.py b/examples/3h/3h_eval.py index a0c7b0fe..08d78941 100644 --- a/examples/3h/3h_eval.py +++ b/examples/3h/3h_eval.py @@ -4,10 +4,14 @@ from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': - OPENAI_MODEL = 'deepseek-chat' - OPENAI_URL = 'https://api.deepseek.com/v1' - OPENAI_KEY = os.getenv("OPENAI_KEY") + # Configure LLM (set your API key via environment variable OPENAI_KEY) + OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o") + OPENAI_URL = os.getenv("OPENAI_URL", "https://api.openai.com/v1") + OPENAI_KEY = os.getenv("OPENAI_KEY", "YOUR_API_KEY") # Set OPENAI_KEY env var common_config = { "model": OPENAI_MODEL, "key": OPENAI_KEY, @@ -15,7 +19,7 @@ } input_data = { - "input_path": str(Path("test/data/test_3h_jsonl.jsonl")), + "input_path": str(PROJECT_ROOT / "test/data/test_3h_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl" diff --git a/examples/artimuse/artimuse.py b/examples/artimuse/artimuse.py index c8ee0d91..673d09ab 100644 --- a/examples/artimuse/artimuse.py +++ b/examples/artimuse/artimuse.py @@ -1,9 +1,14 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': input_data = { - "input_path": "../../test/data/test_imgae_artimuse.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_imgae_artimuse.jsonl"), "dataset": { "source": "local", "format": "jsonl" diff --git a/examples/ats_resume/sdk_keyword_matcher.py b/examples/ats_resume/sdk_keyword_matcher.py index a2d9be42..29132cb7 100644 --- a/examples/ats_resume/sdk_keyword_matcher.py +++ b/examples/ats_resume/sdk_keyword_matcher.py @@ -20,15 +20,17 @@ - Missing required/nice-to-have skills """ +import os + from dingo.config.input_args import EvaluatorLLMArgs from dingo.io.input import Data from dingo.model.llm.llm_keyword_matcher import LLMKeywordMatcher -# Configure LLM +# Configure LLM (set your API key via environment variable OPENAI_KEY) LLMKeywordMatcher.dynamic_config = EvaluatorLLMArgs( - key='sk-xxx', # Replace with your API key - api_url='https://api.deepseek.com', - model='deepseek-chat', + key=os.getenv("OPENAI_KEY", "YOUR_API_KEY"), # Replace with your API key or set OPENAI_KEY env var + api_url=os.getenv("OPENAI_URL", "https://api.openai.com/v1"), + model=os.getenv("OPENAI_MODEL", "gpt-4o"), ) @@ -74,7 +76,7 @@ def example_1_basic_matching(): result = LLMKeywordMatcher.eval(data) print(f"Match Score: {getattr(result, 'score', 'N/A')}") - print(f"Error Status: {result.error_status}") + print(f"Status: {result.status}") # True = has issues, False = passed print(f"Reason:\n{result.reason[0]}") print() @@ -121,7 +123,7 @@ def example_2_english_resume(): result = LLMKeywordMatcher.eval(data) print(f"Match Score: {getattr(result, 'score', 'N/A')}") - print(f"Error Status: {result.error_status}") + print(f"Status: {result.status}") # True = has issues, False = passed print(f"Reason:\n{result.reason[0]}") print() @@ -155,7 +157,7 @@ def example_3_low_match(): result = LLMKeywordMatcher.eval(data) print(f"Match Score: {getattr(result, 'score', 'N/A')}") - print(f"Error Status: {result.error_status}") # Should be True (low match) + print(f"Status: {result.status}") # True = has issues (low match), False = passed print(f"Reason:\n{result.reason[0]}") print() diff --git a/examples/ats_resume/sdk_resume_optimizer.py b/examples/ats_resume/sdk_resume_optimizer.py index 53fbf6a6..a3f78d5a 100644 --- a/examples/ats_resume/sdk_resume_optimizer.py +++ b/examples/ats_resume/sdk_resume_optimizer.py @@ -18,15 +18,17 @@ - Section-by-section changes """ +import os + from dingo.config.input_args import EvaluatorLLMArgs from dingo.io.input import Data from dingo.model.llm.llm_resume_optimizer import LLMResumeOptimizer -# Configure LLM +# Configure LLM (从环境变量读取) LLMResumeOptimizer.dynamic_config = EvaluatorLLMArgs( - key='sk-xxx', # Replace with your API key - api_url='https://api.deepseek.com', - model='deepseek-chat', + key=os.getenv("OPENAI_API_KEY", ""), + api_url=os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com"), + model=os.getenv("OPENAI_MODEL", "deepseek-chat"), ) diff --git a/examples/audio/audioSnr.py b/examples/audio/audioSnr.py index 8be62d11..3544e663 100644 --- a/examples/audio/audioSnr.py +++ b/examples/audio/audioSnr.py @@ -1,12 +1,14 @@ -import os from pathlib import Path from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': input_data = { - "input_path": str(Path("test/data/test_audio_snr.jsonl")), + "input_path": str(PROJECT_ROOT / "test/data/test_audio_snr.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/classify/sdk_QR_classification.py b/examples/classify/sdk_QR_classification.py index 1c528f2b..1cd60330 100644 --- a/examples/classify/sdk_QR_classification.py +++ b/examples/classify/sdk_QR_classification.py @@ -1,10 +1,15 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + def classify_QR(): input_data = { - "input_path": "../../test/data/test_imgQR_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_imgQR_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/classify/sdk_topic_classifcation.py b/examples/classify/sdk_topic_classifcation.py index b95591c0..2055a5f4 100644 --- a/examples/classify/sdk_topic_classifcation.py +++ b/examples/classify/sdk_topic_classifcation.py @@ -1,10 +1,23 @@ +import os +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + +# Configure LLM (set your API key via environment variable OPENAI_KEY) +LLM_CONFIG = { + "key": os.getenv("OPENAI_KEY", "YOUR_API_KEY"), + "api_url": os.getenv("OPENAI_URL", "https://api.openai.com/v1"), + "model": os.getenv("OPENAI_MODEL", "gpt-4o") +} + def classify_topic(): input_data = { - "input_path": "../../test/data/test_sft_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_sft_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl" @@ -19,7 +32,7 @@ def classify_topic(): { "fields": {"content": "question"}, "evals": [ - {"name": "LLMClassifyTopic", "config": {"key": "", "api_url": ""}} + {"name": "LLMClassifyTopic", "config": LLM_CONFIG} ] } ] diff --git a/examples/compare/compare_code.py b/examples/compare/compare_code.py index 14c21c97..942533c8 100644 --- a/examples/compare/compare_code.py +++ b/examples/compare/compare_code.py @@ -1,8 +1,13 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + input_data = { - 'input_path': '../../test/data/compare/WebMainBench_test_1011_dataset_with_results_clean.jsonl', + 'input_path': str(PROJECT_ROOT / 'test/data/compare/WebMainBench_test_1011_dataset_with_results_clean.jsonl'), 'dataset': { 'source': 'local', 'format': 'jsonl', diff --git a/examples/compare/compare_math.py b/examples/compare/compare_math.py index b027fb4a..46f1fba6 100644 --- a/examples/compare/compare_math.py +++ b/examples/compare/compare_math.py @@ -1,8 +1,13 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + input_data = { - 'input_path': '../../test/data/compare/WebMainBench_test_1011_dataset_with_results_clean.jsonl', + 'input_path': str(PROJECT_ROOT / 'test/data/compare/WebMainBench_test_1011_dataset_with_results_clean.jsonl'), 'dataset': { 'source': 'local', 'format': 'jsonl', diff --git a/examples/compare/compare_table.py b/examples/compare/compare_table.py index 9d9f2426..3b5bb3ce 100644 --- a/examples/compare/compare_table.py +++ b/examples/compare/compare_table.py @@ -1,8 +1,13 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + input_data = { - 'input_path': '../../test/data/compare/WebMainBench_test_1011_dataset_with_results_clean_llm_webkit_html.jsonl', + 'input_path': str(PROJECT_ROOT / 'test/data/compare/WebMainBench_test_1011_dataset_with_results_clean_llm_webkit_html.jsonl'), 'dataset': { 'source': 'local', 'format': 'jsonl', diff --git a/examples/compare/html_extract_compare_v1.py b/examples/compare/html_extract_compare_v1.py index b69041bd..a45ed890 100644 --- a/examples/compare/html_extract_compare_v1.py +++ b/examples/compare/html_extract_compare_v1.py @@ -1,9 +1,14 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': input_data = { - "input_path": "../../test/data/compare/old_new_compare_10000.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/compare/old_new_compare_10000.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/compare/html_extract_compare_v2_example_dataset.py b/examples/compare/html_extract_compare_v2_example_dataset.py index 6197d466..f4449b2c 100644 --- a/examples/compare/html_extract_compare_v2_example_dataset.py +++ b/examples/compare/html_extract_compare_v2_example_dataset.py @@ -27,6 +27,9 @@ from dingo.config.input_args import InputArgs from dingo.exec.base import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + # API 配置 OPENAI_MODEL = 'deepseek-chat' OPENAI_URL = os.getenv("OPENAI_BASE_URL") @@ -53,7 +56,7 @@ def evaluate_html_extract_compare_dataset(): # 配置参数 input_data = { "task_name": "html_extract_compare_v2_evaluation", - "input_path": str(Path("test/data/html_extract_compare_test.jsonl")), + "input_path": str(PROJECT_ROOT / "test/data/html_extract_compare_test.jsonl"), "output_path": "output/html_extract_compare_evaluation/", # "log_level": "INFO", diff --git a/examples/continue/continue.py b/examples/continue/continue.py index 5fbe9e8a..889b5495 100644 --- a/examples/continue/continue.py +++ b/examples/continue/continue.py @@ -1,10 +1,15 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + def exec_first(): input_data = { - "input_path": "../../test/data/test_local_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_local_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl" @@ -35,7 +40,7 @@ def exec_first(): def exec_second(): input_data = { - "input_path": "../../test/data/test_local_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_local_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/core/score.py b/examples/core/score.py index c886db58..c3502bb7 100644 --- a/examples/core/score.py +++ b/examples/core/score.py @@ -5,9 +5,10 @@ from dingo.model.llm.llm_text_quality_model_base import LLMTextQualityModelBase from dingo.model.rule.rule_common import RuleEnterAndSpace -OPENAI_MODEL = 'deepseek-chat' -OPENAI_URL = 'https://api.deepseek.com/v1' -OPENAI_KEY = os.getenv("OPENAI_KEY") +# Configure LLM (set your API key via environment variable OPENAI_KEY) +OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o") +OPENAI_URL = os.getenv("OPENAI_URL", "https://api.openai.com/v1") +OPENAI_KEY = os.getenv("OPENAI_KEY", "YOUR_API_KEY") # Set OPENAI_KEY env var def llm(): diff --git a/examples/custom/sdk_custom_llm.py b/examples/custom/sdk_custom_llm.py index 8c61810d..ba7ea604 100644 --- a/examples/custom/sdk_custom_llm.py +++ b/examples/custom/sdk_custom_llm.py @@ -1,9 +1,22 @@ +import os +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + +# Configure LLM (set your API key via environment variable OPENAI_KEY) +LLM_CONFIG = { + "key": os.getenv("OPENAI_KEY", "YOUR_API_KEY"), + "api_url": os.getenv("OPENAI_URL", "https://api.openai.com/v1"), + "model": os.getenv("OPENAI_MODEL", "gpt-4o") +} + if __name__ == '__main__': input_data = { - "input_path": "../../test/data/test_local_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_local_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", @@ -18,7 +31,7 @@ { "fields": {"content": "content"}, "evals": [ - {"name": "LLMTextRepeat", "config": {"key": "", "api_url": ""}}, + {"name": "LLMTextRepeat", "config": LLM_CONFIG}, ] } ] diff --git a/examples/custom/sdk_custom_rule.py b/examples/custom/sdk_custom_rule.py index e155fb88..79563564 100644 --- a/examples/custom/sdk_custom_rule.py +++ b/examples/custom/sdk_custom_rule.py @@ -1,9 +1,14 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': input_data = { - "input_path": "../../test/data/test_local_json.json", + "input_path": str(PROJECT_ROOT / "test/data/test_local_json.json"), "dataset": { "source": "local", "format": "json", diff --git a/examples/dataman/dataman.py b/examples/dataman/dataman.py index bd51de64..1849815c 100644 --- a/examples/dataman/dataman.py +++ b/examples/dataman/dataman.py @@ -1,9 +1,22 @@ +import os +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + +# Configure LLM (set your API key via environment variable OPENAI_KEY) +LLM_CONFIG = { + "key": os.getenv("OPENAI_KEY", "YOUR_API_KEY"), + "api_url": os.getenv("OPENAI_URL", "https://api.openai.com/v1"), + "model": os.getenv("OPENAI_MODEL", "gpt-4o") +} + if __name__ == '__main__': input_data = { - "input_path": "../../test/data/test_dataman_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_dataman_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", @@ -20,7 +33,7 @@ { "fields": {"content": "content"}, "evals": [ - {"name": "LLMDatamanAssessment", "config": {"key": "", "api_url": ""}}, + {"name": "LLMDatamanAssessment", "config": LLM_CONFIG}, ] } ] diff --git a/examples/document_parser/document_parsing_quality_ocr.py b/examples/document_parser/document_parsing_quality_ocr.py index 5a61b621..705df288 100644 --- a/examples/document_parser/document_parsing_quality_ocr.py +++ b/examples/document_parser/document_parsing_quality_ocr.py @@ -1,9 +1,19 @@ +import os +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': + OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") + OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") + OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") + input_data = { - "input_path": "../../test/data/test_document_OCR_recognize.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_document_OCR_recognize.jsonl"), "dataset": { "source": "local", "format": "jsonl", @@ -18,7 +28,7 @@ { "fields": {"id": "id", "content": "pred_content", "prompt": "gt_markdown"}, "evals": [ - {"name": "LLMMinerURecognizeQuality", "config": {"key": "", "api_url": ""}}, + {"name": "LLMMinerURecognizeQuality", "config": {"key": OPENAI_KEY, "api_url": OPENAI_URL, "model": OPENAI_MODEL}}, ] } ] diff --git a/examples/document_parser/document_parsing_quality_ocr_train.py b/examples/document_parser/document_parsing_quality_ocr_train.py index de42b46c..918d9772 100644 --- a/examples/document_parser/document_parsing_quality_ocr_train.py +++ b/examples/document_parser/document_parsing_quality_ocr_train.py @@ -1,9 +1,14 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': input_data = { - "input_path": "test/data/test_document_OCR_recognize.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_document_OCR_recognize.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/document_parser/vlm_document_parser_quality.py b/examples/document_parser/vlm_document_parser_quality.py index ac8b60ef..83c96f2b 100644 --- a/examples/document_parser/vlm_document_parser_quality.py +++ b/examples/document_parser/vlm_document_parser_quality.py @@ -1,9 +1,14 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': input_data = { - "input_path": "../../test/data/test_img_md.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_img_md.jsonl"), "dataset": { "source": "local", "format": "image", diff --git a/examples/document_parser/vlm_layout_quality.py b/examples/document_parser/vlm_layout_quality.py index b165223f..d1219919 100644 --- a/examples/document_parser/vlm_layout_quality.py +++ b/examples/document_parser/vlm_layout_quality.py @@ -1,9 +1,14 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': input_data = { - "input_path": "../../test/data/test_layout_quality.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_layout_quality.jsonl"), "dataset": { "source": "local", "format": "image", diff --git a/examples/factcheck/dataset_factcheck_evaluation.py b/examples/factcheck/dataset_factcheck_evaluation.py index 479d83fa..dd3d72d3 100644 --- a/examples/factcheck/dataset_factcheck_evaluation.py +++ b/examples/factcheck/dataset_factcheck_evaluation.py @@ -16,9 +16,9 @@ # Force import factuality evaluation modules from dingo.model.llm.llm_factcheck_public import LLMFactCheckPublic -OPENAI_MODEL = 'deepseek-chat' -OPENAI_URL = 'https://api.deepseek.com/v1' -OPENAI_KEY = os.getenv("OPENAI_KEY") +OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") +OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") +OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") def evaluate_factuality_jsonl_dataset(): @@ -92,9 +92,8 @@ def evaluate_single_data_example(): result = evaluator.eval(test_data) print("\n=== Evaluation Result ===") - print(f"Error Status: {result.eval_status}") - print(f"Type: {result.type}") - print(f"Name: {result.name}") + print(f"Error Status: {result.status}") + print(f"Label: {result.label}") print(f"Reason: {result.reason}") diff --git a/examples/hallucination/sdk_hallucination_detection.py b/examples/hallucination/sdk_hallucination_detection.py index d74119b2..40f292c5 100644 --- a/examples/hallucination/sdk_hallucination_detection.py +++ b/examples/hallucination/sdk_hallucination_detection.py @@ -8,15 +8,17 @@ against provided reference contexts. """ +import os + from dingo.config.input_args import EvaluatorLLMArgs from dingo.io.input import Data from dingo.model.llm.llm_hallucination import LLMHallucination -# Configure LLM +# Configure LLM (set your API key via environment variable OPENAI_KEY) LLMHallucination.dynamic_config = EvaluatorLLMArgs( - key='sk-xxx', - api_url='https://api.deepseek.com', - model='deepseek-chat', + key=os.getenv("OPENAI_KEY", "YOUR_API_KEY"), # Replace with your API key or set OPENAI_KEY env var + api_url=os.getenv("OPENAI_URL", "https://api.openai.com/v1"), + model=os.getenv("OPENAI_MODEL", "gpt-4o"), ) @@ -34,10 +36,8 @@ def example_1_basic_hallucination_detection(): result = LLMHallucination.eval(data) - print(f"Error Status: {result.eval_status}") - # print(f"Type: {result.type}") - # print(f"Name: {result.name}") - print(f"Type: {result.eval_details}") + print(f"Status: {result.status}") # True = hallucination detected, False = no hallucination + print(f"Label: {result.label}") print(f"Reason: {result.reason[0]}") print(f"Score: {getattr(result, 'score', 'N/A')}") print() @@ -57,10 +57,8 @@ def example_2_no_hallucination(): result = LLMHallucination.eval(data) - print(f"Error Status: {result.eval_status}") - # print(f"Type: {result.type}") - # print(f"Name: {result.name}") - print(f"Type: {result.eval_details}") + print(f"Status: {result.status}") # True = hallucination detected, False = no hallucination + print(f"Label: {result.label}") print(f"Reason: {result.reason[0]}") print(f"Score: {getattr(result, 'score', 'N/A')}") print() @@ -86,14 +84,9 @@ def example_3_multiple_contexts(): result = LLMHallucination.eval(data) - print(f"Error Status: {result.eval_status}") - # print(f"Type: {result.type}") - # print(f"Name: {result.name}") - print(f"Type: {result.eval_details}") + print(f"Status: {result.status}") # True = hallucination detected, False = no hallucination + print(f"Label: {result.label}") print(f"Score: {getattr(result, 'score', 'N/A')}") - # print(f"Verdict Details:") - # for detail in getattr(result, 'verdict_details', []): - # print(f" - {detail}") print() @@ -118,10 +111,8 @@ def example_4_rag_scenario(): result = LLMHallucination.eval(data) - print(f"Error Status: {result.eval_status}") - # print(f"Type: {result.type}") - # print(f"Name: {result.name}") - print(f"Type: {result.eval_details}") + print(f"Status: {result.status}") # True = hallucination detected, False = no hallucination + print(f"Label: {result.label}") print(f"Score: {getattr(result, 'score', 'N/A')}") print("Detailed Analysis:") print(result.reason[0]) @@ -141,16 +132,14 @@ def example_5_missing_context(): result = LLMHallucination.eval(data) - print(f"Error Status: {result.eval_status}") - # print(f"Type: {result.type}") - # print(f"Name: {result.name}") - print(f"Type: {result.eval_details}") + print(f"Status: {result.status}") # True = hallucination detected, False = no hallucination + print(f"Label: {result.label}") print(f"Reason: {result.reason[0]}") print() def example_6_clear_hallucination(): - """Example 6: Clear hallucination case that triggers eval_status=True""" + """Example 6: Clear hallucination case that triggers status=True""" print("=== Example 6: Clear Hallucination (Error Triggered) ===") # Create a case where the response clearly contradicts multiple contexts @@ -170,17 +159,10 @@ def example_6_clear_hallucination(): result = LLMHallucination.eval(data) - print(f"Error Status: {result.eval_status}") - # print(f"Type: {result.type}") - # print(f"Name: {result.name}") - print(f"Type: {result.eval_details}") - print(f"Score: {getattr(result, 'score', 'N/A')}") - print("Detailed Analysis:") - print(result.reason[0]) - # if hasattr(result, 'verdict_details'): - # print("Verdict Details:") - # for detail in result.verdict_details: - # print(f" - {detail}") + print(f"Error Status: {result.status}") + print(f"Label: {result.label}") + print(f"Detailed Analysis:") + print(result.reason[0] if result.reason else "N/A") print() diff --git a/examples/hallucination/sdk_rule_hhem_detection.py b/examples/hallucination/sdk_rule_hhem_detection.py index 5302a880..576fbdc6 100644 --- a/examples/hallucination/sdk_rule_hhem_detection.py +++ b/examples/hallucination/sdk_rule_hhem_detection.py @@ -33,14 +33,12 @@ def example_1_basic_rule_hhem_detection(): result = RuleHallucinationHHEM.eval(data) - print(f"Error Status: {result.eval_status}") - # print(f"Type: {result.type}") - # print(f"Name: {result.name}") - print(f"Type: {result.eval_details}") + print(f"Error Status: {result.status}") # True = hallucination detected, False = no hallucination + print(f"Label: {result.label}") print(f"HHEM Score: {getattr(result, 'score', 'N/A'):.3f}") print(f"Threshold: {RuleHallucinationHHEM.dynamic_config.threshold}") print("\nDetailed Analysis:") - print(result.reason[0]) + print(result.reason[0] if result.reason else "N/A") print() @@ -61,13 +59,11 @@ def example_2_no_hallucination_rule(): result = RuleHallucinationHHEM.eval(data) - print(f"Error Status: {result.eval_status}") - # print(f"Type: {result.type}") - # print(f"Name: {result.name}") - print(f"Type: {result.eval_details}") + print(f"Error Status: {result.status}") # True = hallucination detected, False = no hallucination + print(f"Label: {result.label}") print(f"HHEM Score: {getattr(result, 'score', 'N/A'):.3f}") print("\nDetailed Analysis:") - print(result.reason[0]) + print(result.reason[0] if result.reason else "N/A") print() @@ -91,13 +87,11 @@ def example_3_complex_scenario_rule(): result = RuleHallucinationHHEM.eval(data) - print(f"Error Status: {result.eval_status}") - # print(f"Type: {result.type}") - # print(f"Name: {result.name}") - print(f"Type: {result.eval_details}") + print(f"Error Status: {result.status}") # True = hallucination detected, False = no hallucination + print(f"Label: {result.label}") print(f"HHEM Score: {getattr(result, 'score', 'N/A'):.3f}") print("\nDetailed Analysis:") - print(result.reason[0]) + print(result.reason[0] if result.reason else "N/A") print() @@ -156,7 +150,7 @@ def example_5_batch_evaluation_rule(): print("Batch Rule-based Evaluation Results:") for i, result in enumerate(results): - print(f" Item {i + 1}: Error={result.eval_status}, Score={getattr(result, 'score', 'N/A'):.3f}") + print(f" Item {i + 1}: Error={result.status}, Score={getattr(result, 'score', 'N/A'):.3f}") print() @@ -182,7 +176,7 @@ def example_6_threshold_comparison_rule(): RuleHallucinationHHEM.dynamic_config.threshold = threshold result = RuleHallucinationHHEM.eval(data) - print(f"Threshold {threshold}: Error={result.eval_status}, Score={getattr(result, 'score', 'N/A'):.3f}") + print(f"Threshold {threshold}: Error={result.status}, Score={getattr(result, 'score', 'N/A'):.3f}") # Restore original threshold RuleHallucinationHHEM.dynamic_config.threshold = original_threshold @@ -211,7 +205,7 @@ def example_7_performance_benchmark_rule(): end_time = time.time() print(f"Rule-based HHEM Inference Time: {end_time - start_time:.3f} seconds") - print(f"Result: Error={result.eval_status}, Score={getattr(result, 'score', 'N/A'):.3f}") + print(f"Result: Error={result.status}, Score={getattr(result, 'score', 'N/A'):.3f}") print(f"Model Info: Local HHEM-2.1-Open (Rule-based)") print() diff --git a/examples/image/sdk_image.py b/examples/image/sdk_image.py index 483e0097..d01f94d8 100644 --- a/examples/image/sdk_image.py +++ b/examples/image/sdk_image.py @@ -1,13 +1,22 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + def image(): input_data = { - "input_path": "../../test/data/test_local_img.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_local_img.jsonl"), "dataset": { "source": "local", "format": "image", + "field": { + "id": "id", + "image": "img" + } }, "executor": { "result_save": { diff --git a/examples/image/sdk_image_label_overlap.py b/examples/image/sdk_image_label_overlap.py index 39f63265..064156f9 100644 --- a/examples/image/sdk_image_label_overlap.py +++ b/examples/image/sdk_image_label_overlap.py @@ -1,10 +1,15 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + def image_label_overlap(): input_data = { - "input_path": "../../test/data/img_label/test_img_label_overlap.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/img_label/test_img_label_overlap.jsonl"), "dataset": { "source": "local", "format": "image", diff --git a/examples/image/sdk_image_label_visualization.py b/examples/image/sdk_image_label_visualization.py index 98753d59..4a11d153 100644 --- a/examples/image/sdk_image_label_visualization.py +++ b/examples/image/sdk_image_label_visualization.py @@ -1,10 +1,15 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + def image_label_overlap(): input_data = { - "input_path": "../../test/data/img_label/test_img_label_visualization.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/img_label/test_img_label_visualization.jsonl"), "dataset": { "source": "local", "format": "image", diff --git a/examples/image/sdk_image_relevant.py b/examples/image/sdk_image_relevant.py index 11e95a0c..67e2e35a 100644 --- a/examples/image/sdk_image_relevant.py +++ b/examples/image/sdk_image_relevant.py @@ -1,10 +1,15 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + def image_relevant(): input_data = { - "input_path": "../../test/data/test_img_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_img_jsonl.jsonl"), "output_path": "output/hallucination_evaluation/", "dataset": { "source": "local", diff --git a/examples/image/sdk_image_repeat.py b/examples/image/sdk_image_repeat.py index 24bd0cd5..080a5a2c 100644 --- a/examples/image/sdk_image_repeat.py +++ b/examples/image/sdk_image_repeat.py @@ -1,10 +1,15 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + def image_repeat(): input_data = { - "input_path": "../../test/data/test_img_repeat.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_img_repeat.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/image/sdk_image_text_similar.py b/examples/image/sdk_image_text_similar.py index 6fcf0ae4..15b0ee45 100644 --- a/examples/image/sdk_image_text_similar.py +++ b/examples/image/sdk_image_text_similar.py @@ -1,10 +1,15 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + def image_text_similar(): input_data = { - "input_path": "../../test/data/test_img_text.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_img_text.jsonl"), "dataset": { "source": "local", "format": "image", diff --git a/examples/llm_and_rule/llm_and_rule_mix.py b/examples/llm_and_rule/llm_and_rule_mix.py index ea35f088..baa21809 100644 --- a/examples/llm_and_rule/llm_and_rule_mix.py +++ b/examples/llm_and_rule/llm_and_rule_mix.py @@ -1,15 +1,19 @@ import os +from pathlib import Path from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': - OPENAI_MODEL = 'deepseek-chat' - OPENAI_URL = 'https://api.deepseek.com/v1' - OPENAI_KEY = os.getenv("OPENAI_KEY") + OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") + OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") + OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") input_data = { - "input_path": "../../test/data/test_local_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_local_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/llm_and_rule/llm_local.py b/examples/llm_and_rule/llm_local.py index 76adefe0..29666e18 100644 --- a/examples/llm_and_rule/llm_local.py +++ b/examples/llm_and_rule/llm_local.py @@ -4,6 +4,9 @@ from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") @@ -16,7 +19,7 @@ if __name__ == '__main__': input_data = { - "input_path": str(Path("test/data/test_local_jsonl.jsonl")), + "input_path": str(PROJECT_ROOT / "test/data/test_local_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/llm_and_rule/llm_remote.py b/examples/llm_and_rule/llm_remote.py index d05c43db..5a4eb78b 100644 --- a/examples/llm_and_rule/llm_remote.py +++ b/examples/llm_and_rule/llm_remote.py @@ -1,9 +1,19 @@ +import os +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': + OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") + OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") + OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") + input_data = { - "input_path": "../../test/data/test_local_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_local_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", @@ -18,7 +28,7 @@ { "fields": {"content": "content"}, "evals": [ - {"name": "LLMTextRepeat", "config": {"model": "deepseek-chat", "key": "", "api_url": "https://api.deepseek.com/v1"}} + {"name": "LLMTextRepeat", "config": {"model": OPENAI_MODEL, "key": OPENAI_KEY, "api_url": OPENAI_URL}} ] } ] diff --git a/examples/llm_and_rule/only_llm.py b/examples/llm_and_rule/only_llm.py index 1cb17c3c..4d160225 100644 --- a/examples/llm_and_rule/only_llm.py +++ b/examples/llm_and_rule/only_llm.py @@ -1,15 +1,19 @@ import os +from pathlib import Path from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': - OPENAI_MODEL = 'deepseek-chat' - OPENAI_URL = 'http://10.140.54.48:29990/v1' - OPENAI_KEY = "EMPTY" + OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") + OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") + OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") input_data = { - "input_path": "../../test/data/test_local_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_local_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/llm_and_rule/only_rule.py b/examples/llm_and_rule/only_rule.py index 5d6e62aa..9c08b612 100644 --- a/examples/llm_and_rule/only_rule.py +++ b/examples/llm_and_rule/only_rule.py @@ -1,11 +1,14 @@ -import os +from pathlib import Path from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': input_data = { - "input_path": "../../test/data/test_local_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_local_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/long_video/llm_generate_qa.py b/examples/long_video/llm_generate_qa.py index 2297df1c..207e3864 100644 --- a/examples/long_video/llm_generate_qa.py +++ b/examples/long_video/llm_generate_qa.py @@ -1,9 +1,19 @@ +import os +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': + OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") + OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") + OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") + input_data = { - "input_path": "../../test/data/test_long_video_qa.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_long_video_qa.jsonl"), "dataset": { "source": "local", "format": "jsonl", @@ -18,7 +28,7 @@ { "fields": {"id": "video_id", "content": "summary"}, "evals": [ - {"name": "LLMLongVideoQa", "config": {"key": "", "api_url": ""}} + {"name": "LLMLongVideoQa", "config": {"key": OPENAI_KEY, "api_url": OPENAI_URL, "model": OPENAI_MODEL}} ] } ] diff --git a/examples/meta_rater/sdk_meta_rater_evaluation.py b/examples/meta_rater/sdk_meta_rater_evaluation.py index 4434014e..7597b156 100644 --- a/examples/meta_rater/sdk_meta_rater_evaluation.py +++ b/examples/meta_rater/sdk_meta_rater_evaluation.py @@ -1,9 +1,19 @@ +import os +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': + OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") + OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") + OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") + input_data = { - "input_path": "../../test/data/test_meta_rater.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_meta_rater.jsonl"), "dataset": { "source": "local", "format": "jsonl", @@ -18,10 +28,10 @@ { "fields": {"content": "content"}, "evals": [ - {"name": "LLMMetaRaterEvaluation", "config": {"key": "", "api_url": ""}}, - {"name": "PromptMetaRaterReadability", "config": {"key": "", "api_url": ""}}, - {"name": "PromptMetaRaterReasoning", "config": {"key": "", "api_url": ""}}, - {"name": "PromptMetaRaterCleanliness", "config": {"key": "", "api_url": ""}}, + {"name": "LLMMetaRaterEvaluation", "config": {"key": OPENAI_KEY, "api_url": OPENAI_URL, "model": OPENAI_MODEL}}, + {"name": "PromptMetaRaterReadability", "config": {"key": OPENAI_KEY, "api_url": OPENAI_URL, "model": OPENAI_MODEL}}, + {"name": "PromptMetaRaterReasoning", "config": {"key": OPENAI_KEY, "api_url": OPENAI_URL, "model": OPENAI_MODEL}}, + {"name": "PromptMetaRaterCleanliness", "config": {"key": OPENAI_KEY, "api_url": OPENAI_URL, "model": OPENAI_MODEL}}, ] } ] diff --git a/examples/multi_turn_dialogues/sdk_mtbench101_llm.py b/examples/multi_turn_dialogues/sdk_mtbench101_llm.py index 9aeed06c..123c32b1 100644 --- a/examples/multi_turn_dialogues/sdk_mtbench101_llm.py +++ b/examples/multi_turn_dialogues/sdk_mtbench101_llm.py @@ -1,12 +1,16 @@ import os +from pathlib import Path from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': - OPENAI_MODEL = 'deepseek-chat' - OPENAI_URL = 'https://api.deepseek.com/v1' - OPENAI_KEY = os.getenv("OPENAI_KEY") + OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") + OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") + OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") common_config = { "model": OPENAI_MODEL, "key": OPENAI_KEY, @@ -14,7 +18,7 @@ } input_data = { - "input_path": "../../test/data/test_mtbench101_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_mtbench101_jsonl.jsonl"), "dataset": { "source": "local", "format": "multi_turn_dialog", diff --git a/examples/multi_turn_dialogues/sdk_mtbench101_rule_all.py b/examples/multi_turn_dialogues/sdk_mtbench101_rule_all.py index 6b6327a3..fa7c4d00 100644 --- a/examples/multi_turn_dialogues/sdk_mtbench101_rule_all.py +++ b/examples/multi_turn_dialogues/sdk_mtbench101_rule_all.py @@ -1,9 +1,14 @@ +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': input_data = { - "input_path": "../../test/data/test_mtbench101_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_mtbench101_jsonl.jsonl"), "dataset": { "source": "local", "format": "multi_turn_dialog", diff --git a/examples/rag/dataset_rag_eval_baseline.py b/examples/rag/dataset_rag_eval_baseline.py index bd1cc791..7eac9731 100644 --- a/examples/rag/dataset_rag_eval_baseline.py +++ b/examples/rag/dataset_rag_eval_baseline.py @@ -32,6 +32,9 @@ from dingo.exec import Executor from dingo.io.output.summary_model import SummaryModel +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + # 配置(从环境变量读取) OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") @@ -39,7 +42,7 @@ EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-large") # 数据文件路径 -INPUT_DATA_PATH = str(Path("test/data/fiqa.jsonl")) # 或 "test/data/ragflow_eval_data_50.jsonl" +INPUT_DATA_PATH = str(PROJECT_ROOT / "test/data/fiqa.jsonl") # 或 "test/data/ragflow_eval_data_50.jsonl" def print_metrics_summary(summary: SummaryModel): diff --git a/examples/rag/sdk_rag_eval.py b/examples/rag/sdk_rag_eval.py index c6ef154f..64c1487f 100644 --- a/examples/rag/sdk_rag_eval.py +++ b/examples/rag/sdk_rag_eval.py @@ -17,10 +17,10 @@ from dingo.model.llm.rag.llm_rag_context_relevancy import LLMRAGContextRelevancy from dingo.model.llm.rag.llm_rag_faithfulness import LLMRAGFaithfulness -# 配置(从环境变量读取,或直接设置) +# 配置(从环境变量读取) OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com") -OPENAI_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY") +OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") def test_faithfulness(): diff --git a/examples/register/sdk_register_llm.py b/examples/register/sdk_register_llm.py index c28ea179..647c77be 100644 --- a/examples/register/sdk_register_llm.py +++ b/examples/register/sdk_register_llm.py @@ -1,11 +1,15 @@ import os +from pathlib import Path from dingo.model import Model from dingo.model.llm.base_openai import BaseOpenAI -OPENAI_MODEL = 'deepseek-chat' -OPENAI_URL = 'https://api.deepseek.com/v1' -OPENAI_KEY = os.getenv("OPENAI_KEY") +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + +OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") +OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") +OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") common_config = { "model": OPENAI_MODEL, @@ -30,7 +34,7 @@ class LlmTextQualityRegister(BaseOpenAI): from dingo.exec import Executor input_data = { - "input_path": "../../test/data/test_local_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_local_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", diff --git a/examples/register/sdk_register_rule.py b/examples/register/sdk_register_rule.py index 4b33f3de..30626a32 100644 --- a/examples/register/sdk_register_rule.py +++ b/examples/register/sdk_register_rule.py @@ -24,11 +24,16 @@ def eval(cls, input_data: Data) -> EvalDetail: if __name__ == '__main__': + from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor + # 获取项目根目录 + PROJECT_ROOT = Path(__file__).parent.parent.parent + input_data = { - "input_path": "../../test/data/test_local_json.json", + "input_path": str(PROJECT_ROOT / "test/data/test_local_json.json"), "dataset": { "source": "local", "format": "json", diff --git a/examples/security/text_security_politics.py b/examples/security/text_security_politics.py index 0d0cd9bd..2737a1c3 100644 --- a/examples/security/text_security_politics.py +++ b/examples/security/text_security_politics.py @@ -1,9 +1,19 @@ +import os +from pathlib import Path + from dingo.config import InputArgs from dingo.exec import Executor +# 获取项目根目录 +PROJECT_ROOT = Path(__file__).parent.parent.parent + if __name__ == '__main__': + OPENAI_MODEL = os.getenv("OPENAI_MODEL", "deepseek-chat") + OPENAI_URL = os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1") + OPENAI_KEY = os.getenv("OPENAI_API_KEY", "") + input_data = { - "input_path": "../../test/data/test_local_jsonl.jsonl", + "input_path": str(PROJECT_ROOT / "test/data/test_local_jsonl.jsonl"), "dataset": { "source": "local", "format": "jsonl", @@ -18,7 +28,7 @@ { "fields": {"content": "content"}, "evals": [ - {"name": "LLMSecurityPolitics", "config": {"key": "", "api_url": ""}} + {"name": "LLMSecurityPolitics", "config": {"key": OPENAI_KEY, "api_url": OPENAI_URL, "model": OPENAI_MODEL}} ], } ]