-
Notifications
You must be signed in to change notification settings - Fork 75
Expand file tree
/
Copy pathontology.py
More file actions
454 lines (371 loc) · 15.8 KB
/
ontology.py
File metadata and controls
454 lines (371 loc) · 15.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
import json
import logging
from falkordb import Graph
from .entity import Entity
from .relation import Relation
from typing import Optional, Union
from graphrag_sdk.source import AbstractSource
from graphrag_sdk.models import GenerativeModel
from .attribute import Attribute, AttributeType
logger = logging.getLogger(__name__)
def _process_attributes_from_graph(attributes: list[list[list[str]]]) -> list[Attribute]:
"""
Processes the attributes extracted from the graph and converts them into the SDK convention.
Args:
attributes (list[list[list[str]]]): The attributes extracted from the graph.
Returns:
processed_attributes (list[Attribute]): The processed attributes.
"""
processed_attributes = []
for attr in attributes:
attr_name, attr_type = attr[0]
try:
attr_type = AttributeType.from_string(attr_type)
except:
continue
processed_attributes.append(Attribute(attr_name, attr_type))
return processed_attributes
class Ontology(object):
"""
Represents an ontology, which is a collection of entities and relations.
Attributes:
entities (list[Entity]): The list of entities in the ontology.
relations (list[Relation]): The list of relations in the ontology.
"""
def __init__(self, entities: Optional[list[Entity]] = None, relations: Optional[list[Relation]] = None):
"""
Initialize the Ontology class.
Args:
entities (Optional[list[Entity]]): List of Entity objects. Defaults to None.
relations (Optional[list[Relation]]): List of Relation objects. Defaults to None.
"""
self.entities = entities or []
self.relations = relations or []
@staticmethod
def from_sources(
sources: list[AbstractSource],
model: GenerativeModel,
boundaries: Optional[str] = None,
hide_progress: bool = False,
) -> "Ontology":
"""
Create an Ontology object from a list of sources.
Args:
sources (list[AbstractSource]): A list of AbstractSource objects representing the sources.
boundaries (Optional[str]): The boundaries for the ontology.
model (GenerativeModel): The generative model to use.
hide_progress (bool): Whether to hide the progress bar.
Returns:
The created Ontology object.
"""
# Import here to avoid circular import
from graphrag_sdk.steps.create_ontology_step import CreateOntologyStep
step = CreateOntologyStep(
sources=sources,
ontology=Ontology(),
model=model,
hide_progress=hide_progress,
)
return step.run(boundaries=boundaries)
@staticmethod
def from_json(txt: Union[dict, str]) -> "Ontology":
"""
Creates an Ontology object from a JSON representation.
Args:
txt (Union[dict, str]): The JSON representation of the ontology. It can be either a dictionary or a string.
Returns:
The Ontology object created from the JSON representation.
Raises:
ValueError: If the provided JSON representation is invalid.
"""
txt = txt if isinstance(txt, dict) else json.loads(txt)
return Ontology(
[Entity.from_json(entity) for entity in txt["entities"]],
[Relation.from_json(relation) for relation in txt["relations"]],
)
@staticmethod
def from_schema_graph(graph: Graph) -> "Ontology":
"""
Creates an Ontology object from a given schema graph.
Args:
graph (Graph): The graph object representing the ontology.
Returns:
The Ontology object created from the graph.
"""
ontology = Ontology()
entities = graph.query("MATCH (n) RETURN n").result_set
for entity in entities:
ontology.add_entity(Entity.from_graph(entity[0]))
for relation in graph.query("MATCH ()-[r]->() RETURN r").result_set:
ontology.add_relation(
Relation.from_graph(relation[0], [x for xs in entities for x in xs])
)
return ontology
@staticmethod
def from_kg_graph(graph: Graph, sample_size: Optional[int] = 100) -> "Ontology":
"""
Constructs an Ontology object from a given Knowledge Graph.
This function queries the provided knowledge graph to extract:
1. Entities and their attributes.
2. Relationships between entities and their attributes.
Args:
graph (Graph): The graph object representing the knowledge graph.
sample_size (Optional[int]): The maximum number of attributes to sample for each entity and relationship. Defaults to 100.
Returns:
Ontology: The Ontology object constructed from the Knowledge Graph.
"""
ontology = Ontology()
# Retrieve all node labels and edge types from the graph.
n_labels = graph.call_procedure("db.labels").result_set
e_types = graph.call_procedure("db.relationshipTypes").result_set
# Extract attributes for each node label, limited by the specified sample size.
for lbls in n_labels:
l = lbls[0]
attributes = graph.query(
f"""MATCH (a:{l}) call {{ with a return [k in keys(a) | [k, typeof(a[k])]] as types }}
WITH types limit {sample_size} unwind types as kt RETURN kt, count(1) ORDER BY kt[0]""").result_set
attributes = _process_attributes_from_graph(attributes)
ontology.add_entity(Entity(l, attributes))
# Extract attributes for each edge type, limited by the specified sample size.
for e_type in e_types:
e_t = e_type[0]
attributes = graph.query(
f"""MATCH ()-[a:{e_t}]->() call {{ with a return [k in keys(a) | [k, typeof(a[k])]] as types }}
WITH types limit {sample_size} unwind types as kt RETURN kt, count(1) ORDER BY kt[0]""").result_set
attributes = _process_attributes_from_graph(attributes)
for s_lbls in n_labels:
for t_lbls in n_labels:
s_l = s_lbls[0]
t_l = t_lbls[0]
# Check if a relationship exists between the source and target entity labels
result_set = graph.query(f"MATCH (s:{s_l})-[a:{e_t}]->(t:{t_l}) return a limit 1").result_set
if len(result_set) > 0:
ontology.add_relation(Relation(e_t, s_l, t_l, attributes))
return ontology
@staticmethod
def from_ttl(path: str) -> "Ontology":
"""
Creates an Ontology object from a TTL/Turtle RDF schema file.
This method parses an RDF schema file (RDFS or OWL) and extracts:
1. Classes as entities with their datatype properties as attributes
2. Object properties as relations between entities
3. Labels (rdfs:label) and descriptions (rdfs:comment)
Args:
path (str): Path to the TTL file containing the RDF schema
Returns:
Ontology: The Ontology object extracted from the RDF schema
Raises:
ValueError: If the file contains individual instances (only schema definitions allowed)
ImportError: If rdflib is not installed
Exception: If the TTL file cannot be parsed
"""
try:
from rdflib import Graph as RDFGraph
from graphrag_sdk.rdf_extractor import RDFOntologyExtractor
except ImportError as e:
raise ImportError(
f"Required packages not found: {e}. "
"Please ensure rdflib is installed with `pip install rdflib`"
)
# Parse the RDF graph from TTL file
try:
graph = RDFGraph()
graph.parse(path, format="turtle")
except (FileNotFoundError, OSError, SyntaxError) as e:
raise ValueError(
f"Failed to parse TTL file: {e}. "
"Please ensure the file is valid TTL format."
)
# Extract ontology using RDFOntologyExtractor
extractor = RDFOntologyExtractor(graph)
ontology = extractor.extract()
logger.info(f"Extracted ontology from TTL file: {len(ontology.entities)} entities, {len(ontology.relations)} relations")
return ontology
def add_entity(self, entity: Entity) -> None:
"""
Adds an entity to the ontology.
Args:
entity: The entity object to be added.
"""
self.entities.append(entity)
def add_relation(self, relation: Relation) -> None:
"""
Adds a relation to the ontology.
Args:
relation (Relation): The relation to be added.
"""
self.relations.append(relation)
def to_json(self) -> dict:
"""
Converts the ontology object to a JSON representation.
Returns:
A dictionary representing the ontology object in JSON format.
"""
return {
"entities": [entity.to_json() for entity in self.entities],
"relations": [relation.to_json() for relation in self.relations],
}
def merge_with(self, o: "Ontology"):
"""
Merges the given ontology `o` with the current ontology.
Args:
o (Ontology): The ontology to merge with.
Returns:
The merged ontology.
"""
# Merge entities
for entity in o.entities:
if entity.label not in [n.label for n in self.entities]:
# Entity does not exist in self, add it
self.entities.append(entity)
logger.debug(f"Adding entity {entity.label}")
else:
# Entity exists in self, merge attributes
entity1 = next(n for n in self.entities if n.label == entity.label)
entity1.merge(entity)
# Merge relations
for relation in o.relations:
if relation.label not in [e.label for e in self.relations]:
# Relation does not exist in self, add it
self.relations.append(relation)
logger.debug(f"Adding relation {relation.label}")
else:
# Relation exists in self, merge attributes
relation1 = next(e for e in self.relations if e.label == relation.label)
relation1.combine(relation)
return self
def discard_entities_without_relations(self):
"""
Discards entities that do not have any relations in the ontology.
Returns:
The updated ontology object after discarding entities without relations.
"""
entities_to_discard = [
entity.label
for entity in self.entities
if all(
[
relation.source.label != entity.label
and relation.target.label != entity.label
for relation in self.relations
]
)
]
self.entities = [
entity
for entity in self.entities
if entity.label not in entities_to_discard
]
self.relations = [
relation
for relation in self.relations
if relation.source.label not in entities_to_discard
and relation.target.label not in entities_to_discard
]
if len(entities_to_discard) > 0:
logger.info(f"Discarded entities: {', '.join(entities_to_discard)}")
return self
def discard_relations_without_entities(self):
"""
Discards relations that have entities not present in the ontology.
Returns:
The current instance of the Ontology class.
"""
relations_to_discard = [
relation.label
for relation in self.relations
if relation.source.label not in [entity.label for entity in self.entities]
or relation.target.label not in [entity.label for entity in self.entities]
]
self.relations = [
relation
for relation in self.relations
if relation.label not in relations_to_discard
]
if len(relations_to_discard) > 0:
logger.info(f"Discarded relations: {', '.join(relations_to_discard)}")
return self
def validate_entities(self) -> bool:
"""
Validates the entities in the ontology.
This method checks for entities without unique attributes and logs a warning if any are found.
Returns:
True if all entities have unique attributes, False otherwise.
"""
# Check for entities without unique attributes
entities_without_unique_attributes = [
entity.label
for entity in self.entities
if len(entity.get_unique_attributes()) == 0
]
if len(entities_without_unique_attributes) > 0:
logger.warn(
f"""
*** WARNING ***
The following entities do not have unique attributes:
{', '.join(entities_without_unique_attributes)}
"""
)
return False
return True
def get_entity_with_label(self, label: str) -> Optional[Entity]:
"""
Retrieves the entity with the specified label.
Args:
label (str): The label of the entity to retrieve.
Returns:
The entity with the specified label, or None if not found.
"""
return next((n for n in self.entities if n.label == label), None)
def get_relations_with_label(self, label: str) -> list[Relation]:
"""
Returns a list of relations with the specified label.
Args:
label (str): The label to search for.
Returns:
A list of relations with the specified label.
"""
return [e for e in self.relations if e.label == label]
def has_entity_with_label(self, label: str) -> bool:
"""
Checks if the ontology has an entity with the given label.
Args:
label (str): The label to search for.
Returns:
True if an entity with the given label exists, False otherwise.
"""
return any(n.label == label for n in self.entities)
def has_relation_with_label(self, label: str) -> bool:
"""
Checks if the ontology has a relation with the given label.
Args:
label (str): The label of the relation to check.
Returns:
True if a relation with the given label exists, False otherwise.
"""
return any(e.label == label for e in self.relations)
def __str__(self) -> str:
"""
Returns a string representation of the Ontology object.
The string includes a list of entities and relations in the ontology.
Returns:
A string representation of the Ontology object.
"""
return "Entities:\n\f- {entities}\n\nEdges:\n\f- {relations}".format(
entities="\n- ".join([str(entity) for entity in self.entities]),
relations="\n- ".join([str(relation) for relation in self.relations]),
)
def save_to_graph(self, graph: Graph) -> None:
"""
Saves the entities and relations to the specified graph.
Args:
graph (Graph): The graph to save the entities and relations to.
"""
for entity in self.entities:
query = entity.to_graph_query()
logger.debug(f"Query: {query}")
graph.query(query)
for relation in self.relations:
query = relation.to_graph_query()
logger.debug(f"Query: {query}")
graph.query(query)