Skip to content

Commit 98968e6

Browse files
committed
refactor: remove KGQualityEvaluator and restructure KG evaluation integration
1 parent 084cb08 commit 98968e6

File tree

7 files changed

+80
-143
lines changed

7 files changed

+80
-143
lines changed

graphgen/models/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from .evaluator import (
2-
KGQualityEvaluator,
32
LengthEvaluator,
43
MTLDEvaluator,
54
RewardEvaluator,

graphgen/models/evaluator/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,5 @@
22
from .kg import (
33
AccuracyEvaluator,
44
ConsistencyEvaluator,
5-
KGQualityEvaluator,
65
StructureEvaluator,
76
)

graphgen/models/evaluator/kg/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@ This module provides comprehensive quality evaluation for knowledge graphs built
44

55
## Module Structure
66

7-
The evaluation functionality has been split into modular components:
7+
The evaluation functionality is organized into modular components:
88

99
- **`accuracy_evaluator.py`**: Entity/relation extraction quality evaluation using LLM-as-a-Judge
1010
- **`consistency_evaluator.py`**: Attribute value conflict detection
1111
- **`structure_evaluator.py`**: Graph structural robustness metrics
12-
- **`kg_quality_evaluator.py`**: Main evaluator class that integrates all modules
12+
13+
The evaluation components are integrated in `graphgen/operators/evaluate/evaluate_kg.py`, which provides functions to create and use these evaluators.
1314

1415
## Features
1516

graphgen/models/evaluator/kg/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,10 @@
99

1010
from .accuracy_evaluator import AccuracyEvaluator
1111
from .consistency_evaluator import ConsistencyEvaluator
12-
from .kg_quality_evaluator import KGQualityEvaluator
1312
from .structure_evaluator import StructureEvaluator
1413

1514
__all__ = [
1615
"AccuracyEvaluator",
1716
"ConsistencyEvaluator",
18-
"KGQualityEvaluator",
1917
"StructureEvaluator",
2018
]

graphgen/models/evaluator/kg/kg_quality_evaluator.py

Lines changed: 0 additions & 79 deletions
This file was deleted.

graphgen/operators/evaluate/evaluate_kg.py

Lines changed: 58 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2,70 +2,86 @@
22

33
from dotenv import load_dotenv
44

5-
from graphgen.models import KGQualityEvaluator
5+
from graphgen.bases import BaseGraphStorage, BaseKVStorage, BaseLLMWrapper
6+
from graphgen.common import init_llm, init_storage
7+
from graphgen.models.evaluator.kg.accuracy_evaluator import AccuracyEvaluator
8+
from graphgen.models.evaluator.kg.consistency_evaluator import ConsistencyEvaluator
9+
from graphgen.models.evaluator.kg.structure_evaluator import StructureEvaluator
610
from graphgen.utils import logger
711

812
# Load environment variables
913
load_dotenv()
1014

1115

12-
def evaluate_accuracy(evaluator: KGQualityEvaluator) -> Dict[str, Any]:
13-
"""Evaluate accuracy of entity and relation extraction.
14-
15-
Args:
16-
evaluator: KGQualityEvaluator instance
16+
class KGEvaluators:
17+
def __init__(
18+
self,
19+
working_dir: str = "cache",
20+
graph_backend: str = "kuzu",
21+
kv_backend: str = "rocksdb",
22+
**kwargs
23+
):
24+
# Initialize storage
25+
self.graph_storage: BaseGraphStorage = init_storage(
26+
backend=graph_backend, working_dir=working_dir, namespace="graph"
27+
)
28+
self.chunk_storage: BaseKVStorage = init_storage(
29+
backend=kv_backend, working_dir=working_dir, namespace="chunk"
30+
)
1731

18-
Returns:
19-
Dictionary containing entity_accuracy and relation_accuracy metrics.
20-
"""
32+
# Initialize LLM client
33+
self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
34+
35+
# Initialize individual evaluators
36+
self.accuracy_evaluator = AccuracyEvaluator(
37+
graph_storage=self.graph_storage,
38+
chunk_storage=self.chunk_storage,
39+
llm_client=self.llm_client,
40+
)
41+
42+
self.consistency_evaluator = ConsistencyEvaluator(
43+
graph_storage=self.graph_storage,
44+
chunk_storage=self.chunk_storage,
45+
llm_client=self.llm_client,
46+
)
47+
48+
# Structure evaluator doesn't need chunk_storage or llm_client
49+
structure_params = kwargs.get("structure_params", {})
50+
self.structure_evaluator = StructureEvaluator(
51+
graph_storage=self.graph_storage,
52+
**structure_params
53+
)
54+
55+
logger.info("KG evaluators initialized")
56+
57+
58+
def evaluate_accuracy(evaluators: KGEvaluators) -> Dict[str, Any]:
2159
logger.info("Running accuracy evaluation...")
22-
results = evaluator.evaluate_accuracy()
60+
results = evaluators.accuracy_evaluator.evaluate()
2361
logger.info("Accuracy evaluation completed")
2462
return results
2563

2664

27-
def evaluate_consistency(evaluator: KGQualityEvaluator) -> Dict[str, Any]:
28-
"""Evaluate consistency by detecting semantic conflicts.
29-
30-
Args:
31-
evaluator: KGQualityEvaluator instance
32-
33-
Returns:
34-
Dictionary containing consistency metrics including conflict_rate and conflicts.
35-
"""
65+
def evaluate_consistency(evaluators: KGEvaluators) -> Dict[str, Any]:
3666
logger.info("Running consistency evaluation...")
37-
results = evaluator.evaluate_consistency()
67+
results = evaluators.consistency_evaluator.evaluate()
3868
logger.info("Consistency evaluation completed")
3969
return results
4070

4171

42-
def evaluate_structure(evaluator: KGQualityEvaluator) -> Dict[str, Any]:
43-
"""Evaluate structural robustness of the graph.
44-
45-
Args:
46-
evaluator: KGQualityEvaluator instance
47-
48-
Returns:
49-
Dictionary containing structural metrics including noise_ratio, largest_cc_ratio, etc.
50-
"""
72+
def evaluate_structure(evaluators: KGEvaluators) -> Dict[str, Any]:
5173
logger.info("Running structural robustness evaluation...")
52-
results = evaluator.evaluate_structure()
74+
results = evaluators.structure_evaluator.evaluate()
5375
logger.info("Structural robustness evaluation completed")
5476
return results
5577

5678

57-
def evaluate_all(evaluator: KGQualityEvaluator) -> Dict[str, Any]:
58-
"""Run all evaluations (accuracy, consistency, structure).
59-
60-
Args:
61-
evaluator: KGQualityEvaluator instance
62-
63-
Returns:
64-
Dictionary containing all evaluation results with keys: accuracy, consistency, structure.
65-
"""
79+
def evaluate_all(evaluators: KGEvaluators) -> Dict[str, Any]:
6680
logger.info("Running all evaluations...")
67-
results = evaluator.evaluate_all()
81+
results = {
82+
"accuracy": evaluate_accuracy(evaluators),
83+
"consistency": evaluate_consistency(evaluators),
84+
"structure": evaluate_structure(evaluators),
85+
}
6886
logger.info("All evaluations completed")
6987
return results
70-
71-

graphgen/operators/evaluate/evaluate_service.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
1-
from typing import Any, Dict, List, Union
1+
from typing import Any, Dict, List, Optional, Union
22

33
import pandas as pd
44

5-
from graphgen.bases import BaseLLMWrapper, BaseOperator, QAPair
6-
from graphgen.common import init_llm
7-
from graphgen.models import KGQualityEvaluator
5+
from graphgen.bases import BaseOperator, QAPair
6+
from graphgen.operators.evaluate.evaluate_kg import (
7+
KGEvaluators,
8+
evaluate_accuracy,
9+
evaluate_all,
10+
evaluate_consistency,
11+
evaluate_structure,
12+
)
813
from graphgen.utils import logger, run_concurrent
914

1015

@@ -23,7 +28,6 @@ def __init__(
2328
**kwargs
2429
):
2530
super().__init__(working_dir=working_dir, op_name="evaluate_service")
26-
self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
2731
self.metrics = metrics or []
2832
self.kwargs = kwargs
2933
self.graph_backend = graph_backend
@@ -35,7 +39,7 @@ def __init__(
3539

3640
# Initialize evaluators
3741
self.qa_evaluators = {}
38-
self.kg_evaluator = None
42+
self.kg_evaluators: Optional[KGEvaluators] = None
3943

4044
self._init_evaluators()
4145

@@ -65,16 +69,15 @@ def _init_evaluators(self):
6569
else:
6670
raise ValueError(f"Unknown QA metric: {metric}")
6771

68-
# Initialize KG evaluator if KG metrics are specified
72+
# Initialize KG evaluators if KG metrics are specified
6973
if self.kg_metrics:
7074
kg_params = self.kwargs.get("kg_params", {})
71-
self.kg_evaluator = KGQualityEvaluator(
75+
self.kg_evaluators = KGEvaluators(
7276
working_dir=self.working_dir,
7377
graph_backend=self.graph_backend,
7478
kv_backend=self.kv_backend,
7579
**kg_params
7680
)
77-
logger.info("KG evaluator initialized")
7881

7982
async def _process_single(self, item: dict[str, Any]) -> dict[str, Any]:
8083
try:
@@ -140,17 +143,17 @@ def _evaluate_qa(self, items: list[dict[str, Any]]) -> list[dict[str, Any]]:
140143
return results
141144

142145
def _evaluate_kg(self) -> Dict[str, Any]:
143-
if not self.kg_evaluator:
144-
logger.warning("No KG evaluator initialized, skipping KG evaluation")
146+
if not self.kg_evaluators:
147+
logger.warning("No KG evaluators initialized, skipping KG evaluation")
145148
return {}
146149

147150
results = {}
148151

149152
# Map metric names to evaluation functions
150153
kg_metric_map = {
151-
"kg_accuracy": self.kg_evaluator.evaluate_accuracy,
152-
"kg_consistency": self.kg_evaluator.evaluate_consistency,
153-
"kg_structure": self.kg_evaluator.evaluate_structure,
154+
"kg_accuracy": evaluate_accuracy,
155+
"kg_consistency": evaluate_consistency,
156+
"kg_structure": evaluate_structure,
154157
}
155158

156159
# Run KG evaluations based on metrics
@@ -159,7 +162,7 @@ def _evaluate_kg(self) -> Dict[str, Any]:
159162
logger.info("Running %s evaluation...", metric)
160163
metric_key = metric.replace("kg_", "") # Remove "kg_" prefix
161164
try:
162-
results[metric_key] = kg_metric_map[metric]()
165+
results[metric_key] = kg_metric_map[metric](self.kg_evaluators)
163166
except Exception as e:
164167
logger.error("Error in %s evaluation: %s", metric, str(e))
165168
results[metric_key] = {"error": str(e)}
@@ -169,7 +172,7 @@ def _evaluate_kg(self) -> Dict[str, Any]:
169172
# If no valid metrics were found, run all evaluations
170173
if not results:
171174
logger.info("No valid KG metrics found, running all evaluations")
172-
results = self.kg_evaluator.evaluate_all()
175+
results = evaluate_all(self.kg_evaluators)
173176

174177
return results
175178

0 commit comments

Comments
 (0)