From 5a90e99010c0cbe2655cdde8ebc3126c69e5184f Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 2 Nov 2022 09:10:08 +0100 Subject: [PATCH 1/5] Removed all workflows using SINTEF ci-cd repo --- .github/workflows/{ => removed_for_now}/cd_release.yml | 0 .github/workflows/{ => removed_for_now}/ci_cd_updated_master.yml | 0 .github/workflows/{ => removed_for_now}/ci_dependabot.yml | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{ => removed_for_now}/cd_release.yml (100%) rename .github/workflows/{ => removed_for_now}/ci_cd_updated_master.yml (100%) rename .github/workflows/{ => removed_for_now}/ci_dependabot.yml (100%) diff --git a/.github/workflows/cd_release.yml b/.github/workflows/removed_for_now/cd_release.yml similarity index 100% rename from .github/workflows/cd_release.yml rename to .github/workflows/removed_for_now/cd_release.yml diff --git a/.github/workflows/ci_cd_updated_master.yml b/.github/workflows/removed_for_now/ci_cd_updated_master.yml similarity index 100% rename from .github/workflows/ci_cd_updated_master.yml rename to .github/workflows/removed_for_now/ci_cd_updated_master.yml diff --git a/.github/workflows/ci_dependabot.yml b/.github/workflows/removed_for_now/ci_dependabot.yml similarity index 100% rename from .github/workflows/ci_dependabot.yml rename to .github/workflows/removed_for_now/ci_dependabot.yml From 405c609d5dc0f0b22cd9c6f01be210a46d56390e Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 22 Mar 2024 22:52:30 +0100 Subject: [PATCH 2/5] adding initial test of using json-ld for rdf serialisation --- tests/models/test_jsonld.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/models/test_jsonld.py diff --git a/tests/models/test_jsonld.py b/tests/models/test_jsonld.py new file mode 100644 index 000000000..6c0614bbb --- /dev/null +++ b/tests/models/test_jsonld.py @@ -0,0 +1,24 @@ +import json + +import rdflib + + +json = """ +{ + "@context": { + "@vocab": "http://xmlns.com/foaf/0.1/", + "knows": {"@type": "@id"} + }, + "@id": "http://manu.sporny.org/about#manu", + "@type": "Person", + "name": "Manu Sporny", + "knows": { + "@id": "https://greggkellogg.net/foaf#me", + "@type": "Person", + "name": "Gregg Kellogg" + } +} +""" +g = rdflib.Graph() +g.parse(data=json, format="json-ld") +print(g.serialize(format="turtle")) From ecd88d64aa723418d333f2af1c1b459466293959 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 2 May 2024 17:19:06 +0200 Subject: [PATCH 3/5] Added test - work still in progress... --- docs/api_reference/utils/rdf.md | 3 + tests/models/test_jsonld.py | 237 ++++++++++++++++++++++++++++++-- 2 files changed, 228 insertions(+), 12 deletions(-) create mode 100644 docs/api_reference/utils/rdf.md diff --git a/docs/api_reference/utils/rdf.md b/docs/api_reference/utils/rdf.md new file mode 100644 index 000000000..d0f796567 --- /dev/null +++ b/docs/api_reference/utils/rdf.md @@ -0,0 +1,3 @@ +# rdf + +::: oteapi.utils.rdf diff --git a/tests/models/test_jsonld.py b/tests/models/test_jsonld.py index 6c0614bbb..f926b9043 100644 --- a/tests/models/test_jsonld.py +++ b/tests/models/test_jsonld.py @@ -1,24 +1,237 @@ import json +from pathlib import Path import rdflib +from rdflib.plugins.shared.jsonld.context import Context +from oteapi.utils import rdf -json = """ +thisdir = Path(__file__).resolve().parent +testdir = thisdir.parent +staticdir = testdir / "static" + +# s = """ +# { +# "@context": { +# "@vocab": "http://xmlns.com/foaf/0.1/", +# "knows": {"@type": "@id"} +# }, +# "@id": "http://manu.sporny.org/about#manu", +# "@type": "Person", +# "name": "Manu Sporny", +# "knows": { +# "@id": "https://greggkellogg.net/foaf#me", +# "@type": "Person", +# "name": "Gregg Kellogg" +# } +# } +# """ +# g = rdflib.Graph() +# g.parse(data=s, format="json-ld") +# #print(g.serialize(format="turtle")) +# #print("------------------------------------------------") +# #print() +# +# +# conf = """ +# { +# "@context": { +# "oteio": "https://w3id.org/emmo/domain/oteio#", +# "dcat": "http://www.w3.org/ns/dcat#", +# "dcterms": "http://purl.org/dc/terms/", +# +# "downloadURL": "dcat:downloadURL", +# "mediaType": "dcat:mediaType", +# "license": "dcterms:license", +# "driver": "oteio:driver", +# "configuration": "oteio:configuration", +# "dataresource": "oteio:dataresource" +# }, +# "http://example.com/ex/faithfull": { +# "@type": "oteio:Source", +# "@id": "http://example.com/ex/faithfull", +# "dataresource": { +# "downloadURL": "http://example.com/datasets/faithfull.csv", +# "mediaType": "application/csv", +# "license": "https://creativecommons.org/licenses/by/4.0/legalcode", +# "configuration": { +# "driver": "csv" +# } +# } +# } +# } +# """ +# # "http://example.com/ex/faithfull": { +# g = rdflib.Graph() +# #g.bind("ex", "http://example.com/ex/") +# g.parse(data=conf, format="json-ld") +# #print(g.serialize(format="turtle")) +# #print("------------------------------------------------") +# #print() + + +conf2 = """ { "@context": { - "@vocab": "http://xmlns.com/foaf/0.1/", - "knows": {"@type": "@id"} + "@version": 1.1, + + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "dcterms": "http://purl.org/dc/terms/", + "dcat": "http://www.w3.org/ns/dcat#", + "oteio": "https://w3id.org/emmo/domain/oteio#", + + "resources": "@nest", + "configuration": { + "@id": "oteio:configuration", + "@type": "@json" + }, + "dataresource": { + "@id": "oteio:dataresource", + "@type": "oteio:DataResource", + "@nest": "resources" + }, + "downloadURL": "dcat:downloadURL", + "mediaType": "dcat:mediaType", + "license": "dcterms:license", + + "parse": "oteio:parse", + "parserType": "oteio:parserType", + "datamodel": "oteio:datamodel", + + "driver": "oteio:driver" }, - "@id": "http://manu.sporny.org/about#manu", - "@type": "Person", - "name": "Manu Sporny", - "knows": { - "@id": "https://greggkellogg.net/foaf#me", - "@type": "Person", - "name": "Gregg Kellogg" - } + + "resources": [ + { + "@type": "oteio:Source", + "@id": "http://example.com/ex/faithfull", + "dataresource": { + "downloadURL": "http://example.com/datasets/faithfull.csv", + "mediaType": "application/csv", + "license": "https://creativecommons.org/licenses/by/4.0/legalcode" + }, + "parse": { + "parserType": "application/vnd.dlite-parse", + "datamodel": "http://onto-ns.com/meta/calm/0.1/Composition", + "configuration": { + "driver": "csv" + } + } + } + ] } """ +# "http://example.com/ex/faithfull": { g = rdflib.Graph() -g.parse(data=json, format="json-ld") +# g.bind("ex", "http://example.com/ex/") +# print(json.loads(conf2)) +g.parse(data=conf2, format="json-ld") print(g.serialize(format="turtle")) +print("------------------------------------------------") +print() +context_data = json.loads(conf2).get("@context") +context = Context(context_data) +graph_data = json.loads( + g.serialize(format="json-ld", context_data=context_data, auto_compact=True) +).get("@graph") +# print(json.dumps(graph_data, indent=2)) +# print("------------------------------------------------") +# print() + + +def expand(item): + """Returns `item` with all all references to blank nodes expanded.""" + d = {} + for k, v in item.items(): + if k == "@id": + if v.startswith("_:"): + dct = iris[v].copy() + dct.pop("@id") + d.update(expand(dct)) + else: + d[k] = v + elif isinstance(v, dict): + if "@value" in v: + if v.get("@type") == "rdf:JSON": + d[k] = json.loads(v["@value"]) + else: + d[k] = v["@value"] + else: + d[k] = expand(v) + else: + d[k] = v + return d + + +def from_rdf(graph, context_data): + graph_data = json.loads( + g.serialize(format="json-ld", context_data=context_data, auto_compact=True) + ).get("@graph") + # iris = {d["@id"]: d for d in graph_data if "@id" in d} + resources = [ + expand(d) for d in graph_data if "@id" in d and not d["@id"].startswith("_:") + ] + json_data = { + "resources": resources, + } + return json_data + + +iris = {d["@id"]: d for d in graph_data if "@id" in d} +assert "http://example.com/ex/faithfull" in iris +# resources = [ +# expand(d) for d in graph_data +# if "@id" in d and not d["@id"].startswith("_:") +# ] +# +# json_data = { +# "resources": resources, +# } +# print(json.dumps(json_data, indent=2)) +print(json.dumps(from_rdf(g, context_data), indent=2)) + + +# PREFIX ex: +res = g.query( + """ +PREFIX ex: +CONSTRUCT { ?s ?p ?o } +WHERE { + ex:faithfull (<>|!<>) ?s . + ?s ?p ?o . +} +""" +) + + +# data = """ +# @prefix : . +# +# :A :p :B, :C . +# :B :q :D . +# :C :r :E . +# +# :F :s :G . +# :G :t :H . +# """ +# query = """ +# PREFIX x: +# PREFIX : +# +# CONSTRUCT { +# ?s ?p ?o +# } +# WHERE { +# :A (<>|!<>)* ?s . +# ?s ?p ?o . +# } +# """ +# graph = rdflib.Graph() +# graph.parse(data=data) +# res = graph.query(query) + + +# with open(staticdir / "resources.yaml", "rt", encoding="utf8") as f: +# data = yaml.safe_load(f) + +graph = rdf.add_resource(staticdir / "resources.yaml") From 713d3ff0f186f6a04189133654e920e2af6051a6 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 2 May 2024 17:43:32 +0200 Subject: [PATCH 4/5] Added rdf.py --- oteapi/utils/rdf.py | 113 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 oteapi/utils/rdf.py diff --git a/oteapi/utils/rdf.py b/oteapi/utils/rdf.py new file mode 100644 index 000000000..6bd943a86 --- /dev/null +++ b/oteapi/utils/rdf.py @@ -0,0 +1,113 @@ +"""Utility functions for representing instances of pydantic models as rdf. + +This module uses JSON-LD with a shared context on https://w3id.org/domain/oteio/context +""" + +import io +import json +from pathlib import Path +from typing import TYPE_CHECKING + +import rdflib +import yaml + +if TYPE_CHECKING: # pragma: no cover + from typing import Any, Optional, TextIO, Union + + # import tripper + + +def load_content( + source: "Optional[Union[Path, str, TextIO]]" = None, + data: "Optional[str]" = None, + format: "Optional[str]" = None, +) -> "Any": + """Load content from yaml or json source. + + Arguments: + source: File name or file-like object with data documentation to add. + data: String containing the data documentation to add. + format: Input format. One of: "yaml", "json". + By default it will be inferred from `source` or `data`. + + Returns: + Python representation of the content. + """ + if not source and not data: + raise TypeError("Either `source` or `data` must be given.") + + if source and isinstance(source, (str, Path)): + with open(source, "rt") as f: + return load_content(source=f, format=format) + + if format is None: + if source: + format = Path(source.name).suffix + elif data.lstrip().startswith("---"): + format = "yaml" + elif data.lstrip().startswith("{"): + format = "json" + + if format is None: + raise ValueError("Format cannot be inferred. Use `format` argument.") + + format = format.lstrip(".").lower() + if format in ("yaml", "yml"): + if not source: + source = io.StringIO(data) + content = yaml.safe_load(source) + elif format in ("json"): + content = json.load(source) if source else json.loads(data) + else: + raise TypeError(f"Unsupported format: {format}") + + return content + + +def add_resource( + source: "Optional[Union[Path, str, TextIO]]" = None, + data: "Optional[dict, str]" = None, + format: "Optional[str]" = None, + graph: "Optional[Union[rdflib.Graph, Any]]" = None, +) -> "Union[rdflib.Graph, Any]": + """Add documentation of data resource(s) to triplestore. + + Arguments: + + source: File name or file-like object with data documentation + to add. + data: Dict or string containing the data documentation to add. + format: Input format. One of: "yaml", "json". + By default it will be inferred from `source` or `data`. + graph: The graph to add the documentation to. It can be a + rdflib.Graph object or any type that has a parse() method + that supports json-ld. + If not given, a new rdflib.Graph object will be created. + + Returns: + The provided graph or a new rdflib.Graph object, if `graph` is + None. + """ + if isinstance(data, dict): + content = data.copy() + else: + content = load_content(source=source, data=data, format=format) + + if not isinstance(content, dict): + raise TypeError("Expected input content to be a dict.") + + if "@context" not in content: + content["@context"] = "https://w3id.org/emmo/domain/oteio/context" + + if not graph: + graph = rdflib.Graph() + + with open("xxx.json", "wt") as f: + json.dump(content, f, indent=2) + + # print("=====================================") + # print(json.dumps(content, indent=2)) + + # graph.parse(data=content, format="json-ld") + graph.parse(source="xxx.json", format="json-ld") + return graph From 9d4ffd024ff2bdbb349ffe312189097caa3868d8 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 3 May 2024 14:11:58 +0200 Subject: [PATCH 5/5] Added dependenncy of rdflib --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index c18127988..33df95d0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "pydantic~=2.7", "pydantic-settings~=2.2", "typing-extensions~=4.11; python_version < '3.10'", + "rdflib>=6.3", # Strategy dependencies "celery>=5.3.5,<6",