feat: add English prompts for consistency evaluation

CHERRY-ui8 · CHERRY-ui8 · commit 920ee7052e8c · 2025-12-30T01:59:55.000+08:00
diff --git a/graphgen/models/evaluator/kg/consistency_evaluator.py b/graphgen/models/evaluator/kg/consistency_evaluator.py
@@ -6,12 +6,9 @@
 from graphgen.bases import BaseGraphStorage, BaseKVStorage, BaseLLMWrapper
 from graphgen.bases.datatypes import Chunk
 from graphgen.templates.evaluation.kg.consistency_evaluation import (
-    ENTITY_DESCRIPTION_CONFLICT_PROMPT,
-    ENTITY_EXTRACTION_PROMPT,
-    ENTITY_TYPE_CONFLICT_PROMPT,
-    RELATION_CONFLICT_PROMPT,
+    CONSISTENCY_EVALUATION_PROMPT,
 )
-from graphgen.utils import logger
+from graphgen.utils import detect_main_language, logger
 
 
 class ConsistencyEvaluator:
@@ -194,7 +191,9 @@ def _extract_entity_from_chunk(
             # Clean entity_id: remove surrounding quotes if present
             clean_entity_id = self._clean_entity_id(entity_id)
 
-            prompt = ENTITY_EXTRACTION_PROMPT.format(
+            # Detect language and get appropriate prompt
+            lang = detect_main_language(chunk.content)
+            prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["ENTITY_EXTRACTION"].format(
                 entity_name=clean_entity_id,
                 chunk_content=chunk.content[:2000]
                 if chunk.content
@@ -270,7 +269,10 @@ def _check_entity_type_consistency(
                 if entity_type
             ]
 
-            prompt = ENTITY_TYPE_CONFLICT_PROMPT.format(
+            # Use default language (zh) for conflict detection prompts
+            # as they compare multiple chunks which may have different languages
+            lang = "zh"  # Default to Chinese, can be made configurable
+            prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["ENTITY_TYPE_CONFLICT"].format(
                 entity_name=entity_id, type_extractions="\n".join(type_list)
             )
 
@@ -313,7 +315,9 @@ def _check_entity_description_consistency(
                 for chunk_id, description in valid_descriptions.items()
             ]
 
-            prompt = ENTITY_DESCRIPTION_CONFLICT_PROMPT.format(
+            # Use default language (zh) for conflict detection prompts
+            lang = "zh"  # Default to Chinese, can be made configurable
+            prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["ENTITY_DESCRIPTION_CONFLICT"].format(
                 entity_name=entity_id, descriptions="\n".join(desc_list)
             )
 
@@ -351,7 +355,9 @@ def _check_relation_consistency(
                 if relation
             ]
 
-            prompt = RELATION_CONFLICT_PROMPT.format(
+            # Use default language (zh) for conflict detection prompts
+            lang = "zh"  # Default to Chinese, can be made configurable
+            prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["RELATION_CONFLICT"].format(
                 source_entity=src_id,
                 target_entity=dst_id,
                 relation_descriptions="\n".join(rel_list),
diff --git a/graphgen/templates/evaluation/kg/consistency_evaluation.py b/graphgen/templates/evaluation/kg/consistency_evaluation.py
@@ -1,4 +1,4 @@
-ENTITY_TYPE_CONFLICT_PROMPT = """你是一个知识图谱一致性评估专家。你的任务是判断同一个实体在不同文本块中被提取为不同的类型，是否存在语义冲突。
+ENTITY_TYPE_CONFLICT_PROMPT_ZH = """你是一个知识图谱一致性评估专家。你的任务是判断同一个实体在不同文本块中被提取为不同的类型，是否存在语义冲突。
 
 实体名称：{entity_name}
 
@@ -21,7 +21,30 @@
 }}
 """
 
-ENTITY_DESCRIPTION_CONFLICT_PROMPT = """你是一个知识图谱一致性评估专家。你的任务是判断同一个实体在不同文本块中的描述是否存在语义冲突。
+ENTITY_TYPE_CONFLICT_PROMPT_EN = """You are a Knowledge Graph Consistency Assessment Expert. Your task is to determine whether there are semantic conflicts when the same entity is extracted as different types in different text blocks.
+
+Entity Name: {entity_name}
+
+Type extraction results from different text blocks:
+{type_extractions}
+
+Preset entity type list (for reference):
+concept, date, location, keyword, organization, person, event, work, nature, artificial, science, technology, mission, gene
+
+Please determine whether these types have semantic conflicts (i.e., whether they describe the same category of things, or if there are contradictions).
+Note: If types are just different expressions of the same concept (such as concept and keyword), it may not be considered a serious conflict.
+
+Please return in JSON format:
+{{
+    "has_conflict": <true/false>,
+    "conflict_severity": <float between 0-1, where 0 means no conflict, 1 means severe conflict>,
+    "conflict_reasoning": "<reasoning for conflict judgment>",
+    "conflicting_types": ["<pairs of conflicting types>"],
+    "recommended_type": "<if there is a conflict, the recommended correct type (must be one of the preset types)>"
+}}
+"""
+
+ENTITY_DESCRIPTION_CONFLICT_PROMPT_ZH = """你是一个知识图谱一致性评估专家。你的任务是判断同一个实体在不同文本块中的描述是否存在语义冲突。
 
 实体名称：{entity_name}
 
@@ -40,7 +63,26 @@
 }}
 """
 
-RELATION_CONFLICT_PROMPT = """你是一个知识图谱一致性评估专家。你的任务是判断同一对实体在不同文本块中的关系描述是否存在语义冲突。
+ENTITY_DESCRIPTION_CONFLICT_PROMPT_EN = """You are a Knowledge Graph Consistency Assessment Expert. Your task is to determine whether there are semantic conflicts in the descriptions of the same entity across different text blocks.
+
+Entity Name: {entity_name}
+
+Descriptions from different text blocks:
+{descriptions}
+
+Please determine whether these descriptions have semantic conflicts (i.e., whether they describe the same entity, or if there is contradictory information).
+
+Please return in JSON format:
+{{
+    "has_conflict": <true/false>,
+    "conflict_severity": <float between 0-1>,
+    "conflict_reasoning": "<reasoning for conflict judgment>",
+    "conflicting_descriptions": ["<pairs of conflicting descriptions>"],
+    "conflict_details": "<specific conflict content>"
+}}
+"""
+
+RELATION_CONFLICT_PROMPT_ZH = """你是一个知识图谱一致性评估专家。你的任务是判断同一对实体在不同文本块中的关系描述是否存在语义冲突。
 
 实体对：{source_entity} -> {target_entity}
 
@@ -58,7 +100,25 @@
 }}
 """
 
-ENTITY_EXTRACTION_PROMPT = """从以下文本块中提取指定实体的类型和描述。
+RELATION_CONFLICT_PROMPT_EN = """You are a Knowledge Graph Consistency Assessment Expert. Your task is to determine whether there are semantic conflicts in the relation descriptions of the same entity pair across different text blocks.
+
+Entity Pair: {source_entity} -> {target_entity}
+
+Relation descriptions from different text blocks:
+{relation_descriptions}
+
+Please determine whether these relation descriptions have semantic conflicts.
+
+Please return in JSON format:
+{{
+    "has_conflict": <true/false>,
+    "conflict_severity": <float between 0-1>,
+    "conflict_reasoning": "<reasoning for conflict judgment>",
+    "conflicting_relations": ["<pairs of conflicting relation descriptions>"]
+}}
+"""
+
+ENTITY_EXTRACTION_PROMPT_ZH = """从以下文本块中提取指定实体的类型和描述。
 
 **重要**：你只需要提取指定的实体，不要提取其他实体。
 
@@ -96,7 +156,55 @@
 }}
 """
 
+ENTITY_EXTRACTION_PROMPT_EN = """Extract the type and description of the specified entity from the following text block.
+
+**Important**: You should only extract the specified entity, do not extract other entities.
+
+Entity Name: {entity_name}
+
+Text Block:
+{chunk_content}
+
+Please find and extract the following information for **this entity only** (entity name: {entity_name}) from the text block:
+
+1. entity_type: Entity type, must be one of the following preset types (lowercase):
+   - concept: concept
+   - date: date
+   - location: location
+   - keyword: keyword
+   - organization: organization
+   - person: person
+   - event: event
+   - work: work
+   - nature: nature
+   - artificial: artificial
+   - science: science
+   - technology: technology
+   - mission: mission
+   - gene: gene
+
+   If the type cannot be determined, please use "concept" as the default value.
+
+2. description: Entity description (briefly describe the role and characteristics of this entity in the text)
+
+Please return in JSON format:
+{{
+    "entity_type": "<entity type (must be one of the preset types above)>",
+    "description": "<entity description>"
+}}
+"""
+
 CONSISTENCY_EVALUATION_PROMPT = {
-    "en": "",
-    "zh": ""
+    "zh": {
+        "ENTITY_TYPE_CONFLICT": ENTITY_TYPE_CONFLICT_PROMPT_ZH,
+        "ENTITY_DESCRIPTION_CONFLICT": ENTITY_DESCRIPTION_CONFLICT_PROMPT_ZH,
+        "RELATION_CONFLICT": RELATION_CONFLICT_PROMPT_ZH,
+        "ENTITY_EXTRACTION": ENTITY_EXTRACTION_PROMPT_ZH,
+    },
+    "en": {
+        "ENTITY_TYPE_CONFLICT": ENTITY_TYPE_CONFLICT_PROMPT_EN,
+        "ENTITY_DESCRIPTION_CONFLICT": ENTITY_DESCRIPTION_CONFLICT_PROMPT_EN,
+        "RELATION_CONFLICT": RELATION_CONFLICT_PROMPT_EN,
+        "ENTITY_EXTRACTION": ENTITY_EXTRACTION_PROMPT_EN,
+    },
 }