55from rdflib import OWL , RDF , RDFS , Graph , Literal , Namespace , URIRef
66from rdflib .plugins .sparql import prepareQuery
77
8- from pandasaurus .graph .graph_generator_utils import (
9- add_edge ,
10- add_outgoing_edges_to_subgraph ,
11- )
128from pandasaurus .utils .logging_config import configure_logger
139
1410# Set up logger
@@ -65,30 +61,16 @@ def apply_transitive_reduction(graph: Graph, predicate_list: List[str]) -> Graph
6561 relationship.
6662 """
6763 invalid_predicates = []
68- # TODO We need a better way to handle the queries, and decide the format we accept in the predicate list
69- ask_query = prepareQuery ("SELECT ?s ?p WHERE { ?s ?p ?o }" )
7064 for predicate in predicate_list :
71- predicate_ = RDFS . subClassOf if predicate == "rdfs:subClassOf" else URIRef (predicate )
72- if predicate and not graph . query ( ask_query , initBindings = { "p" : predicate_ }, initNs = { "rdfs" : RDFS } ):
65+ predicate_uri = GraphGenerator . _normalize_predicate (predicate )
66+ if not GraphGenerator . _predicate_exists ( graph , predicate_uri ):
7367 invalid_predicates .append (predicate )
7468 continue
7569
76- subgraph = add_outgoing_edges_to_subgraph (graph , predicate_ )
77-
78- nx_graph = nx .DiGraph ()
79- for s , p , o in subgraph :
80- if isinstance (o , URIRef ) and p != RDF .type :
81- add_edge (nx_graph , s , predicate , o )
82-
83- # Apply transitive reduction to remove redundancy
84- transitive_reduction_graph = nx .transitive_reduction (nx_graph )
85- transitive_reduction_graph .add_edges_from (
86- (u , v , nx_graph .edges [u , v ]) for u , v in transitive_reduction_graph .edges
87- )
88- # Remove redundant triples using nx graph
89- edge_diff = list (set (nx_graph .edges ) - set (transitive_reduction_graph .edges ))
90- for edge in edge_diff :
91- graph .remove ((URIRef (edge [0 ]), predicate_ , URIRef (edge [1 ])))
70+ subgraph = GraphGenerator ._add_outgoing_edges_to_subgraph (graph , predicate_uri )
71+ nx_graph = GraphGenerator ._build_networkx_graph (subgraph , predicate )
72+ redundant_edges = GraphGenerator ._compute_redundant_edges (nx_graph )
73+ GraphGenerator ._remove_redundant_triples (graph , redundant_edges , predicate_uri )
9274 # TODO Temporarily disabling this log message
9375 # logger.info(f"Transitive reduction has been applied on {predicate} for graph generation.")
9476
@@ -101,3 +83,55 @@ def apply_transitive_reduction(graph: Graph, predicate_list: List[str]) -> Graph
10183 logger .error (error_msg )
10284
10385 return graph
86+
87+ @staticmethod
88+ def _normalize_predicate (predicate : str ) -> URIRef :
89+ """Return the RDF predicate URI, handling the rdfs:subClassOf shortcut."""
90+ return RDFS .subClassOf if predicate == "rdfs:subClassOf" else URIRef (predicate )
91+
92+ @staticmethod
93+ def _predicate_exists (graph : Graph , predicate_uri : URIRef ) -> bool :
94+ """Check whether the predicate occurs in the graph before processing."""
95+ ask_query = prepareQuery ("SELECT ?s ?p WHERE { ?s ?p ?o }" )
96+ return bool (graph .query (ask_query , initBindings = {"p" : predicate_uri }, initNs = {"rdfs" : RDFS }))
97+
98+ @staticmethod
99+ def _build_networkx_graph (subgraph : Graph , predicate : str ) -> nx .DiGraph :
100+ """Convert the rdflib subgraph into a networkx DiGraph for reduction."""
101+ nx_graph = nx .DiGraph ()
102+ for s , p , o in subgraph :
103+ if isinstance (o , URIRef ) and p != RDF .type :
104+ GraphGenerator ._add_edge (nx_graph , s , predicate , o )
105+ return nx_graph
106+
107+ @staticmethod
108+ def _compute_redundant_edges (nx_graph : nx .DiGraph ) -> List [tuple ]:
109+ """Return the edges that should be removed after a transitive reduction."""
110+ transitive_reduction_graph = nx .transitive_reduction (nx_graph )
111+ transitive_reduction_graph .add_edges_from (
112+ (u , v , nx_graph .edges [u , v ]) for u , v in transitive_reduction_graph .edges
113+ )
114+ return list (set (nx_graph .edges ) - set (transitive_reduction_graph .edges ))
115+
116+ @staticmethod
117+ def _remove_redundant_triples (graph : Graph , redundant_edges : List [tuple ], predicate_uri : URIRef ) -> None :
118+ """Remove redundant triples from the rdflib graph using the computed edge list."""
119+ for source , target in redundant_edges :
120+ graph .remove ((URIRef (source ), predicate_uri , URIRef (target )))
121+
122+ @staticmethod
123+ def _add_edge (nx_graph , subject , predicate , obj ):
124+ edge_data = {"label" : str (predicate ).split ("#" )[- 1 ] if "#" in predicate else str (predicate ).split ("/" )[- 1 ]}
125+ nx_graph .add_edge (
126+ str (subject ),
127+ str (obj ),
128+ ** edge_data ,
129+ )
130+
131+ @staticmethod
132+ def _add_outgoing_edges_to_subgraph (graph , predicate_uri = None ):
133+ subgraph = Graph ()
134+ for s , p , o in graph .triples ((None , predicate_uri , None )):
135+ subgraph .add ((s , p , o ))
136+
137+ return subgraph
0 commit comments