Skip to content

Commit 6488bc5

Browse files
authored
Add flag to write_rdf for outputting triples (#618)
1 parent 7f86813 commit 6488bc5

File tree

2 files changed

+159
-9
lines changed

2 files changed

+159
-9
lines changed

src/sssom/writers.py

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
)
2020

2121
import pandas as pd
22+
import rdflib
2223
import yaml
2324
from curies import Converter
2425
from deprecation import deprecated
@@ -31,7 +32,7 @@
3132

3233
from sssom.validators import check_all_prefixes_in_curie_map
3334

34-
from .constants import CURIE_MAP, SCHEMA_YAML, SSSOM_URI_PREFIX, PathOrIO
35+
from .constants import CURIE_MAP, PREDICATE_MODIFIER_NOT, SCHEMA_YAML, SSSOM_URI_PREFIX, PathOrIO
3536
from .context import _load_sssom_context
3637
from .parsers import to_mapping_set_document
3738
from .util import (
@@ -55,6 +56,12 @@
5556
OWL_EQUIV_OBJECTPROPERTY = "http://www.w3.org/2002/07/owl#equivalentProperty"
5657
SSSOM_NS = SSSOM_URI_PREFIX
5758

59+
NO_TERM_REF = rdflib.URIRef("https://w3id.org/sssom/NoTermFound")
60+
PREDICATE_MODIFIER = rdflib.URIRef("https://w3id.org/sssom/predicate_modifier")
61+
OBJECT_NOT = rdflib.URIRef("https://w3id.org/sssom/NegatedPredicate")
62+
LITERAL_NOT = rdflib.Literal(PREDICATE_MODIFIER_NOT)
63+
NEGATED_NODES: set[rdflib.Node] = {OBJECT_NOT, LITERAL_NOT}
64+
5865
# Writers
5966

6067
MSDFWriter = Callable[[MappingSetDataFrame, TextIO], None]
@@ -116,8 +123,18 @@ def write_rdf(
116123
msdf: MappingSetDataFrame,
117124
file: PathOrIO,
118125
serialisation: Optional[str] = None,
126+
*,
127+
hydrate: bool = False,
119128
) -> None:
120-
"""Write a mapping set dataframe to the file as RDF."""
129+
"""Write a mapping set dataframe to the file as RDF.
130+
131+
:param msdf: A mapping set dataframe
132+
:param file: The path or file object to write to
133+
:param serialisation: The RDF format to serialize to, see :data:`RDF_FORMATS`.
134+
Defaults to turtle.
135+
:param hydrate: If true, will add subject-predicate-objects directly representing
136+
mappings. This is opt-in behavior.
137+
"""
121138
if serialisation is None:
122139
serialisation = SSSOM_DEFAULT_RDF_SERIALISATION
123140
elif serialisation not in RDF_FORMATS:
@@ -128,7 +145,7 @@ def write_rdf(
128145
serialisation = SSSOM_DEFAULT_RDF_SERIALISATION
129146

130147
check_all_prefixes_in_curie_map(msdf)
131-
graph = to_rdf_graph(msdf=msdf)
148+
graph = to_rdf_graph(msdf=msdf, hydrate=hydrate)
132149
t = graph.serialize(format=serialisation, encoding="utf-8")
133150
with _open_text_writer(file) as fh:
134151
print(t.decode(), file=fh)
@@ -204,6 +221,33 @@ def write_owl(
204221
# Converters convert a mappingsetdataframe to an object of the supportes types (json, pandas dataframe)
205222

206223

224+
def _hydrate_axioms(
225+
graph: rdflib.Graph,
226+
*,
227+
add_negative: bool = True,
228+
add_no_term_found: bool = True,
229+
) -> None:
230+
for axiom in graph.subjects(RDF.type, OWL.Axiom):
231+
for p in graph.objects(subject=axiom, predicate=OWL.annotatedProperty):
232+
for s in graph.objects(subject=axiom, predicate=OWL.annotatedSource):
233+
for o in graph.objects(subject=axiom, predicate=OWL.annotatedTarget):
234+
if not add_negative and _is_negated(graph, axiom):
235+
continue
236+
if not add_no_term_found and _is_no_term_found(s, o):
237+
continue
238+
graph.add((s, p, o))
239+
240+
241+
def _is_no_term_found(s: rdflib.Node, o: rdflib.Node) -> bool:
242+
return s == NO_TERM_REF or o == NO_TERM_REF
243+
244+
245+
def _is_negated(graph: rdflib.Graph, axiom: rdflib.Node) -> bool:
246+
return any(
247+
obj in NEGATED_NODES for obj in graph.objects(subject=axiom, predicate=PREDICATE_MODIFIER)
248+
)
249+
250+
207251
def to_owl_graph(msdf: MappingSetDataFrame) -> Graph:
208252
"""Convert a mapping set dataframe to OWL in an RDF graph."""
209253
msdf.df = invert_mappings(
@@ -217,11 +261,8 @@ def to_owl_graph(msdf: MappingSetDataFrame) -> Graph:
217261
for _s, _p, o in graph.triples((None, URIRef(URI_SSSOM_MAPPINGS), None)):
218262
graph.add((o, URIRef(RDF_TYPE), OWL.Axiom))
219263

220-
for axiom in graph.subjects(RDF.type, OWL.Axiom):
221-
for p in graph.objects(subject=axiom, predicate=OWL.annotatedProperty):
222-
for s in graph.objects(subject=axiom, predicate=OWL.annotatedSource):
223-
for o in graph.objects(subject=axiom, predicate=OWL.annotatedTarget):
224-
graph.add((s, p, o))
264+
# TODO consider making this not add negative or term not found
265+
_hydrate_axioms(graph, add_negative=True, add_no_term_found=True)
225266

226267
sparql_prefixes = """
227268
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
@@ -307,7 +348,7 @@ def to_owl_graph(msdf: MappingSetDataFrame) -> Graph:
307348
return graph
308349

309350

310-
def to_rdf_graph(msdf: MappingSetDataFrame) -> Graph:
351+
def to_rdf_graph(msdf: MappingSetDataFrame, *, hydrate: bool = False) -> Graph:
311352
"""Convert a mapping set dataframe to an RDF graph."""
312353
doc = to_mapping_set_document(msdf)
313354
graph = rdflib_dumper.as_rdf_graph(
@@ -316,6 +357,8 @@ def to_rdf_graph(msdf: MappingSetDataFrame) -> Graph:
316357
# TODO Use msdf.converter directly via https://github.com/linkml/linkml-runtime/pull/278
317358
prefix_map=msdf.converter.bimap,
318359
)
360+
if hydrate:
361+
_hydrate_axioms(graph, add_no_term_found=False, add_negative=False)
319362
return cast(Graph, graph)
320363

321364

tests/test_convert.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,22 @@
11
"""Tests for conversion utilities."""
22

3+
from __future__ import annotations
4+
35
import unittest
46

7+
import curies
8+
import pandas as pd
9+
import rdflib
10+
11+
from sssom import MappingSetDataFrame
12+
from sssom.constants import (
13+
MAPPING_JUSTIFICATION,
14+
OBJECT_ID,
15+
PREDICATE_ID,
16+
PREDICATE_MODIFIER,
17+
SEMAPV,
18+
SUBJECT_ID,
19+
)
520
from sssom.parsers import parse_sssom_table
621
from sssom.writers import to_json, to_owl_graph, to_rdf_graph
722
from tests.constants import data_dir
@@ -95,3 +110,95 @@ def test_to_json(self) -> None:
95110
# ensure no JSON-LD strangeness
96111
for k in m.keys():
97112
self.assertFalse(k.startswith("@"))
113+
114+
def test_to_rdf_hydrated(self) -> None:
115+
"""Test converting to RDF with hydration."""
116+
rows = [
117+
(
118+
"DOID:0050601",
119+
"skos:exactMatch",
120+
"UMLS:C1863204",
121+
SEMAPV.ManualMappingCuration.value,
122+
"",
123+
),
124+
(
125+
"mesh:C562684",
126+
"skos:exactMatch",
127+
"HP:0003348",
128+
SEMAPV.ManualMappingCuration.value,
129+
"NOT",
130+
),
131+
(
132+
"mesh:C563052",
133+
"skos:exactMatch",
134+
"sssom:NoTermFound",
135+
SEMAPV.ManualMappingCuration.value,
136+
"",
137+
),
138+
(
139+
"sssom:NoTermFound",
140+
"skos:exactMatch",
141+
"mesh:C562684",
142+
SEMAPV.ManualMappingCuration.value,
143+
"",
144+
),
145+
]
146+
columns = [
147+
SUBJECT_ID,
148+
PREDICATE_ID,
149+
OBJECT_ID,
150+
MAPPING_JUSTIFICATION,
151+
PREDICATE_MODIFIER,
152+
]
153+
df = pd.DataFrame(rows, columns=columns)
154+
converter = curies.Converter.from_prefix_map(
155+
{
156+
"DOID": "http://purl.obolibrary.org/obo/DOID_",
157+
"HP": "http://purl.obolibrary.org/obo/HP_",
158+
"UMLS": "https://uts.nlm.nih.gov/uts/umls/concept/",
159+
"mesh": "http://id.nlm.nih.gov/mesh/",
160+
"sssom": "https://w3id.org/sssom/",
161+
}
162+
)
163+
msdf = MappingSetDataFrame(df, converter=converter)
164+
graph = to_rdf_graph(msdf, hydrate=False)
165+
self.assertIn("sssom", {p for p, _ in graph.namespaces()})
166+
self.assert_not_ask(
167+
graph,
168+
"ASK { DOID:0050601 skos:exactMatch UMLS:C1863204 }",
169+
msg="hydration should not have occurred",
170+
)
171+
self.assert_not_ask(graph, "ASK { mesh:C562684 skos:exactMatch HP:0003348 }")
172+
self.assert_not_ask(graph, "ASK { mesh:C563052 skos:exactMatch sssom:NoTermFound }")
173+
self.assert_not_ask(graph, "ASK { sssom:NoTermFound skos:exactMatch mesh:C564625 }")
174+
175+
graph = to_rdf_graph(msdf, hydrate=True)
176+
self.assertIn("sssom", {p for p, _ in graph.namespaces()})
177+
self.assert_ask(
178+
graph,
179+
"ASK { DOID:0050601 skos:exactMatch UMLS:C1863204 }",
180+
msg="regular triple should be hydrated",
181+
)
182+
self.assert_not_ask(
183+
graph,
184+
"ASK { mesh:C562684 skos:exactMatch HP:0003348 }",
185+
msg="triple with NOT modifier should not be hydrated",
186+
)
187+
self.assert_not_ask(
188+
graph,
189+
"ASK { mesh:C563052 skos:exactMatch sssom:NoTermFound }",
190+
msg="triple with NoTermFound as object should not be hydrated",
191+
)
192+
self.assert_not_ask(
193+
graph,
194+
"ASK { sssom:NoTermFound skos:exactMatch mesh:C564625 }",
195+
msg="triple with NoTermFound as subject should not be hydrated",
196+
)
197+
198+
def assert_ask(self, graph: rdflib.Graph, query: str, *, msg: str | None = None) -> None:
199+
"""Assert that the query returns a true answer."""
200+
self.assertTrue(graph.query(query).askAnswer, msg=msg)
201+
202+
def assert_not_ask(self, graph: rdflib.Graph, query: str, *, msg: str | None = None) -> None:
203+
"""Assert that the query returns a false answer."""
204+
self.assertFalse(graph.query(query).askAnswer, msg=msg)

0 commit comments

Comments
 (0)