Skip to content

Commit 920ee70

Browse files
committed
feat: add English prompts for consistency evaluation
1 parent 69c4560 commit 920ee70

File tree

2 files changed

+129
-15
lines changed

2 files changed

+129
-15
lines changed

graphgen/models/evaluator/kg/consistency_evaluator.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,9 @@
66
from graphgen.bases import BaseGraphStorage, BaseKVStorage, BaseLLMWrapper
77
from graphgen.bases.datatypes import Chunk
88
from graphgen.templates.evaluation.kg.consistency_evaluation import (
9-
ENTITY_DESCRIPTION_CONFLICT_PROMPT,
10-
ENTITY_EXTRACTION_PROMPT,
11-
ENTITY_TYPE_CONFLICT_PROMPT,
12-
RELATION_CONFLICT_PROMPT,
9+
CONSISTENCY_EVALUATION_PROMPT,
1310
)
14-
from graphgen.utils import logger
11+
from graphgen.utils import detect_main_language, logger
1512

1613

1714
class ConsistencyEvaluator:
@@ -194,7 +191,9 @@ def _extract_entity_from_chunk(
194191
# Clean entity_id: remove surrounding quotes if present
195192
clean_entity_id = self._clean_entity_id(entity_id)
196193

197-
prompt = ENTITY_EXTRACTION_PROMPT.format(
194+
# Detect language and get appropriate prompt
195+
lang = detect_main_language(chunk.content)
196+
prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["ENTITY_EXTRACTION"].format(
198197
entity_name=clean_entity_id,
199198
chunk_content=chunk.content[:2000]
200199
if chunk.content
@@ -270,7 +269,10 @@ def _check_entity_type_consistency(
270269
if entity_type
271270
]
272271

273-
prompt = ENTITY_TYPE_CONFLICT_PROMPT.format(
272+
# Use default language (zh) for conflict detection prompts
273+
# as they compare multiple chunks which may have different languages
274+
lang = "zh" # Default to Chinese, can be made configurable
275+
prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["ENTITY_TYPE_CONFLICT"].format(
274276
entity_name=entity_id, type_extractions="\n".join(type_list)
275277
)
276278

@@ -313,7 +315,9 @@ def _check_entity_description_consistency(
313315
for chunk_id, description in valid_descriptions.items()
314316
]
315317

316-
prompt = ENTITY_DESCRIPTION_CONFLICT_PROMPT.format(
318+
# Use default language (zh) for conflict detection prompts
319+
lang = "zh" # Default to Chinese, can be made configurable
320+
prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["ENTITY_DESCRIPTION_CONFLICT"].format(
317321
entity_name=entity_id, descriptions="\n".join(desc_list)
318322
)
319323

@@ -351,7 +355,9 @@ def _check_relation_consistency(
351355
if relation
352356
]
353357

354-
prompt = RELATION_CONFLICT_PROMPT.format(
358+
# Use default language (zh) for conflict detection prompts
359+
lang = "zh" # Default to Chinese, can be made configurable
360+
prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["RELATION_CONFLICT"].format(
355361
source_entity=src_id,
356362
target_entity=dst_id,
357363
relation_descriptions="\n".join(rel_list),

graphgen/templates/evaluation/kg/consistency_evaluation.py

Lines changed: 114 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ENTITY_TYPE_CONFLICT_PROMPT = """你是一个知识图谱一致性评估专家。你的任务是判断同一个实体在不同文本块中被提取为不同的类型,是否存在语义冲突。
1+
ENTITY_TYPE_CONFLICT_PROMPT_ZH = """你是一个知识图谱一致性评估专家。你的任务是判断同一个实体在不同文本块中被提取为不同的类型,是否存在语义冲突。
22
33
实体名称:{entity_name}
44
@@ -21,7 +21,30 @@
2121
}}
2222
"""
2323

24-
ENTITY_DESCRIPTION_CONFLICT_PROMPT = """你是一个知识图谱一致性评估专家。你的任务是判断同一个实体在不同文本块中的描述是否存在语义冲突。
24+
ENTITY_TYPE_CONFLICT_PROMPT_EN = """You are a Knowledge Graph Consistency Assessment Expert. Your task is to determine whether there are semantic conflicts when the same entity is extracted as different types in different text blocks.
25+
26+
Entity Name: {entity_name}
27+
28+
Type extraction results from different text blocks:
29+
{type_extractions}
30+
31+
Preset entity type list (for reference):
32+
concept, date, location, keyword, organization, person, event, work, nature, artificial, science, technology, mission, gene
33+
34+
Please determine whether these types have semantic conflicts (i.e., whether they describe the same category of things, or if there are contradictions).
35+
Note: If types are just different expressions of the same concept (such as concept and keyword), it may not be considered a serious conflict.
36+
37+
Please return in JSON format:
38+
{{
39+
"has_conflict": <true/false>,
40+
"conflict_severity": <float between 0-1, where 0 means no conflict, 1 means severe conflict>,
41+
"conflict_reasoning": "<reasoning for conflict judgment>",
42+
"conflicting_types": ["<pairs of conflicting types>"],
43+
"recommended_type": "<if there is a conflict, the recommended correct type (must be one of the preset types)>"
44+
}}
45+
"""
46+
47+
ENTITY_DESCRIPTION_CONFLICT_PROMPT_ZH = """你是一个知识图谱一致性评估专家。你的任务是判断同一个实体在不同文本块中的描述是否存在语义冲突。
2548
2649
实体名称:{entity_name}
2750
@@ -40,7 +63,26 @@
4063
}}
4164
"""
4265

43-
RELATION_CONFLICT_PROMPT = """你是一个知识图谱一致性评估专家。你的任务是判断同一对实体在不同文本块中的关系描述是否存在语义冲突。
66+
ENTITY_DESCRIPTION_CONFLICT_PROMPT_EN = """You are a Knowledge Graph Consistency Assessment Expert. Your task is to determine whether there are semantic conflicts in the descriptions of the same entity across different text blocks.
67+
68+
Entity Name: {entity_name}
69+
70+
Descriptions from different text blocks:
71+
{descriptions}
72+
73+
Please determine whether these descriptions have semantic conflicts (i.e., whether they describe the same entity, or if there is contradictory information).
74+
75+
Please return in JSON format:
76+
{{
77+
"has_conflict": <true/false>,
78+
"conflict_severity": <float between 0-1>,
79+
"conflict_reasoning": "<reasoning for conflict judgment>",
80+
"conflicting_descriptions": ["<pairs of conflicting descriptions>"],
81+
"conflict_details": "<specific conflict content>"
82+
}}
83+
"""
84+
85+
RELATION_CONFLICT_PROMPT_ZH = """你是一个知识图谱一致性评估专家。你的任务是判断同一对实体在不同文本块中的关系描述是否存在语义冲突。
4486
4587
实体对:{source_entity} -> {target_entity}
4688
@@ -58,7 +100,25 @@
58100
}}
59101
"""
60102

61-
ENTITY_EXTRACTION_PROMPT = """从以下文本块中提取指定实体的类型和描述。
103+
RELATION_CONFLICT_PROMPT_EN = """You are a Knowledge Graph Consistency Assessment Expert. Your task is to determine whether there are semantic conflicts in the relation descriptions of the same entity pair across different text blocks.
104+
105+
Entity Pair: {source_entity} -> {target_entity}
106+
107+
Relation descriptions from different text blocks:
108+
{relation_descriptions}
109+
110+
Please determine whether these relation descriptions have semantic conflicts.
111+
112+
Please return in JSON format:
113+
{{
114+
"has_conflict": <true/false>,
115+
"conflict_severity": <float between 0-1>,
116+
"conflict_reasoning": "<reasoning for conflict judgment>",
117+
"conflicting_relations": ["<pairs of conflicting relation descriptions>"]
118+
}}
119+
"""
120+
121+
ENTITY_EXTRACTION_PROMPT_ZH = """从以下文本块中提取指定实体的类型和描述。
62122
63123
**重要**:你只需要提取指定的实体,不要提取其他实体。
64124
@@ -96,7 +156,55 @@
96156
}}
97157
"""
98158

159+
ENTITY_EXTRACTION_PROMPT_EN = """Extract the type and description of the specified entity from the following text block.
160+
161+
**Important**: You should only extract the specified entity, do not extract other entities.
162+
163+
Entity Name: {entity_name}
164+
165+
Text Block:
166+
{chunk_content}
167+
168+
Please find and extract the following information for **this entity only** (entity name: {entity_name}) from the text block:
169+
170+
1. entity_type: Entity type, must be one of the following preset types (lowercase):
171+
- concept: concept
172+
- date: date
173+
- location: location
174+
- keyword: keyword
175+
- organization: organization
176+
- person: person
177+
- event: event
178+
- work: work
179+
- nature: nature
180+
- artificial: artificial
181+
- science: science
182+
- technology: technology
183+
- mission: mission
184+
- gene: gene
185+
186+
If the type cannot be determined, please use "concept" as the default value.
187+
188+
2. description: Entity description (briefly describe the role and characteristics of this entity in the text)
189+
190+
Please return in JSON format:
191+
{{
192+
"entity_type": "<entity type (must be one of the preset types above)>",
193+
"description": "<entity description>"
194+
}}
195+
"""
196+
99197
CONSISTENCY_EVALUATION_PROMPT = {
100-
"en": "",
101-
"zh": ""
198+
"zh": {
199+
"ENTITY_TYPE_CONFLICT": ENTITY_TYPE_CONFLICT_PROMPT_ZH,
200+
"ENTITY_DESCRIPTION_CONFLICT": ENTITY_DESCRIPTION_CONFLICT_PROMPT_ZH,
201+
"RELATION_CONFLICT": RELATION_CONFLICT_PROMPT_ZH,
202+
"ENTITY_EXTRACTION": ENTITY_EXTRACTION_PROMPT_ZH,
203+
},
204+
"en": {
205+
"ENTITY_TYPE_CONFLICT": ENTITY_TYPE_CONFLICT_PROMPT_EN,
206+
"ENTITY_DESCRIPTION_CONFLICT": ENTITY_DESCRIPTION_CONFLICT_PROMPT_EN,
207+
"RELATION_CONFLICT": RELATION_CONFLICT_PROMPT_EN,
208+
"ENTITY_EXTRACTION": ENTITY_EXTRACTION_PROMPT_EN,
209+
},
102210
}

0 commit comments

Comments
 (0)