Skip to content

Commit e26381b

Browse files
authored
Inline graph generator utils and expand tests (#50)
1 parent c0f5f7c commit e26381b

File tree

5 files changed

+58
-157
lines changed

5 files changed

+58
-157
lines changed

docs/pandasaurus/graph/graph_generator_utils.rst

Lines changed: 0 additions & 13 deletions
This file was deleted.

docs/pandasaurus/graph/index.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,3 @@ Graph Module
66
:caption: Contents:
77

88
graph_generator
9-
graph_generator_utils

pandasaurus/graph/graph_generator.py

Lines changed: 58 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@
55
from rdflib import OWL, RDF, RDFS, Graph, Literal, Namespace, URIRef
66
from rdflib.plugins.sparql import prepareQuery
77

8-
from pandasaurus.graph.graph_generator_utils import (
9-
add_edge,
10-
add_outgoing_edges_to_subgraph,
11-
)
128
from pandasaurus.utils.logging_config import configure_logger
139

1410
# Set up logger
@@ -65,30 +61,16 @@ def apply_transitive_reduction(graph: Graph, predicate_list: List[str]) -> Graph
6561
relationship.
6662
"""
6763
invalid_predicates = []
68-
# TODO We need a better way to handle the queries, and decide the format we accept in the predicate list
69-
ask_query = prepareQuery("SELECT ?s ?p WHERE { ?s ?p ?o }")
7064
for predicate in predicate_list:
71-
predicate_ = RDFS.subClassOf if predicate == "rdfs:subClassOf" else URIRef(predicate)
72-
if predicate and not graph.query(ask_query, initBindings={"p": predicate_}, initNs={"rdfs": RDFS}):
65+
predicate_uri = GraphGenerator._normalize_predicate(predicate)
66+
if not GraphGenerator._predicate_exists(graph, predicate_uri):
7367
invalid_predicates.append(predicate)
7468
continue
7569

76-
subgraph = add_outgoing_edges_to_subgraph(graph, predicate_)
77-
78-
nx_graph = nx.DiGraph()
79-
for s, p, o in subgraph:
80-
if isinstance(o, URIRef) and p != RDF.type:
81-
add_edge(nx_graph, s, predicate, o)
82-
83-
# Apply transitive reduction to remove redundancy
84-
transitive_reduction_graph = nx.transitive_reduction(nx_graph)
85-
transitive_reduction_graph.add_edges_from(
86-
(u, v, nx_graph.edges[u, v]) for u, v in transitive_reduction_graph.edges
87-
)
88-
# Remove redundant triples using nx graph
89-
edge_diff = list(set(nx_graph.edges) - set(transitive_reduction_graph.edges))
90-
for edge in edge_diff:
91-
graph.remove((URIRef(edge[0]), predicate_, URIRef(edge[1])))
70+
subgraph = GraphGenerator._add_outgoing_edges_to_subgraph(graph, predicate_uri)
71+
nx_graph = GraphGenerator._build_networkx_graph(subgraph, predicate)
72+
redundant_edges = GraphGenerator._compute_redundant_edges(nx_graph)
73+
GraphGenerator._remove_redundant_triples(graph, redundant_edges, predicate_uri)
9274
# TODO Temporarily disabling this log message
9375
# logger.info(f"Transitive reduction has been applied on {predicate} for graph generation.")
9476

@@ -101,3 +83,55 @@ def apply_transitive_reduction(graph: Graph, predicate_list: List[str]) -> Graph
10183
logger.error(error_msg)
10284

10385
return graph
86+
87+
@staticmethod
88+
def _normalize_predicate(predicate: str) -> URIRef:
89+
"""Return the RDF predicate URI, handling the rdfs:subClassOf shortcut."""
90+
return RDFS.subClassOf if predicate == "rdfs:subClassOf" else URIRef(predicate)
91+
92+
@staticmethod
93+
def _predicate_exists(graph: Graph, predicate_uri: URIRef) -> bool:
94+
"""Check whether the predicate occurs in the graph before processing."""
95+
ask_query = prepareQuery("SELECT ?s ?p WHERE { ?s ?p ?o }")
96+
return bool(graph.query(ask_query, initBindings={"p": predicate_uri}, initNs={"rdfs": RDFS}))
97+
98+
@staticmethod
99+
def _build_networkx_graph(subgraph: Graph, predicate: str) -> nx.DiGraph:
100+
"""Convert the rdflib subgraph into a networkx DiGraph for reduction."""
101+
nx_graph = nx.DiGraph()
102+
for s, p, o in subgraph:
103+
if isinstance(o, URIRef) and p != RDF.type:
104+
GraphGenerator._add_edge(nx_graph, s, predicate, o)
105+
return nx_graph
106+
107+
@staticmethod
108+
def _compute_redundant_edges(nx_graph: nx.DiGraph) -> List[tuple]:
109+
"""Return the edges that should be removed after a transitive reduction."""
110+
transitive_reduction_graph = nx.transitive_reduction(nx_graph)
111+
transitive_reduction_graph.add_edges_from(
112+
(u, v, nx_graph.edges[u, v]) for u, v in transitive_reduction_graph.edges
113+
)
114+
return list(set(nx_graph.edges) - set(transitive_reduction_graph.edges))
115+
116+
@staticmethod
117+
def _remove_redundant_triples(graph: Graph, redundant_edges: List[tuple], predicate_uri: URIRef) -> None:
118+
"""Remove redundant triples from the rdflib graph using the computed edge list."""
119+
for source, target in redundant_edges:
120+
graph.remove((URIRef(source), predicate_uri, URIRef(target)))
121+
122+
@staticmethod
123+
def _add_edge(nx_graph, subject, predicate, obj):
124+
edge_data = {"label": str(predicate).split("#")[-1] if "#" in predicate else str(predicate).split("/")[-1]}
125+
nx_graph.add_edge(
126+
str(subject),
127+
str(obj),
128+
**edge_data,
129+
)
130+
131+
@staticmethod
132+
def _add_outgoing_edges_to_subgraph(graph, predicate_uri=None):
133+
subgraph = Graph()
134+
for s, p, o in graph.triples((None, predicate_uri, None)):
135+
subgraph.add((s, p, o))
136+
137+
return subgraph

pandasaurus/graph/graph_generator_utils.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

test/graph/test_graph_generator_utils.py

Lines changed: 0 additions & 91 deletions
This file was deleted.

0 commit comments

Comments
 (0)