From 514c044988fd54fa819be080e9db13135bcb13f2 Mon Sep 17 00:00:00 2001 From: alex Date: Wed, 29 Oct 2025 16:41:06 +0100 Subject: [PATCH] create and test --- servers/mcp-neo4j-data-modeling/CHANGELOG.md | 2 + .../mcp-neo4j-data-modeling/pyproject.toml | 1 + .../src/mcp_neo4j_data_modeling/data_model.py | 190 ++++++++++++++++++ .../src/mcp_neo4j_data_modeling/server.py | 12 ++ .../tests/resources/blueplaques.ttl | 113 +++++++++++ .../tests/unit/test_data_model.py | 137 +++++++++++++ servers/mcp-neo4j-data-modeling/uv.lock | 33 +++ 7 files changed, 488 insertions(+) create mode 100644 servers/mcp-neo4j-data-modeling/tests/resources/blueplaques.ttl diff --git a/servers/mcp-neo4j-data-modeling/CHANGELOG.md b/servers/mcp-neo4j-data-modeling/CHANGELOG.md index ad14ec7..78bf89a 100644 --- a/servers/mcp-neo4j-data-modeling/CHANGELOG.md +++ b/servers/mcp-neo4j-data-modeling/CHANGELOG.md @@ -5,6 +5,8 @@ ### Changed ### Added +* Add import and export methods to `DataModel` for turtle OWL strings +* Add MCP tools for loading and exporting turtle OWL files ## v0.5.1 diff --git a/servers/mcp-neo4j-data-modeling/pyproject.toml b/servers/mcp-neo4j-data-modeling/pyproject.toml index 056a8f5..2758567 100644 --- a/servers/mcp-neo4j-data-modeling/pyproject.toml +++ b/servers/mcp-neo4j-data-modeling/pyproject.toml @@ -8,6 +8,7 @@ dependencies = [ "fastmcp>=2.0.0", "pydantic>=2.10.1", "starlette>=0.47.0", + "rdflib>=7.0.0", ] diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py index 8f7f2db..4f654e3 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py @@ -3,6 +3,7 @@ from typing import Any from pydantic import BaseModel, Field, ValidationInfo, field_validator +from rdflib import Graph, Namespace, RDF, RDFS, OWL, XSD, Literal, URIRef NODE_COLOR_PALETTE = [ ("#e3f2fd", "#1976d2"), # Light Blue / Blue @@ -551,6 +552,195 @@ def to_arrows_dict(self) -> dict[str, Any]: def to_arrows_json_str(self) -> str: "Convert the data model to an Arrows Data Model JSON string." return json.dumps(self.to_arrows_dict(), indent=2) + + def to_owl_turtle_str(self) -> str: + """Convert the data model to an OWL Turtle string. + + This method creates an OWL ontology from the Neo4j data model: + - Node labels become OWL Classes + - Node properties become OWL DatatypeProperties with the node class as domain + - Relationship types become OWL ObjectProperties with start/end nodes as domain/range + - Relationship properties become OWL DatatypeProperties with the relationship as domain + """ + # Create a new RDF graph + g = Graph() + + # Define namespaces + # Use a generic namespace for the ontology + base_ns = Namespace("http://voc.neo4j.com/datamodel#") + g.bind("", base_ns) + g.bind("owl", OWL) + g.bind("rdfs", RDFS) + g.bind("xsd", XSD) + + # Create the ontology declaration + ontology_uri = URIRef("http://voc.neo4j.com/datamodel") + g.add((ontology_uri, RDF.type, OWL.Ontology)) + + # Map Neo4j types to XSD types + type_mapping = { + "STRING": XSD.string, + "INTEGER": XSD.integer, + "FLOAT": XSD.float, + "BOOLEAN": XSD.boolean, + "DATE": XSD.date, + "DATETIME": XSD.dateTime, + "TIME": XSD.time, + "DURATION": XSD.duration, + "LONG": XSD.long, + "DOUBLE": XSD.double, + } + + # Process nodes -> OWL Classes + for node in self.nodes: + class_uri = base_ns[node.label] + g.add((class_uri, RDF.type, OWL.Class)) + + # Add key property as a datatype property + if node.key_property: + prop_uri = base_ns[node.key_property.name] + g.add((prop_uri, RDF.type, OWL.DatatypeProperty)) + g.add((prop_uri, RDFS.domain, class_uri)) + xsd_type = type_mapping.get(node.key_property.type.upper(), XSD.string) + g.add((prop_uri, RDFS.range, xsd_type)) + + # Add other properties as datatype properties + for prop in node.properties: + prop_uri = base_ns[prop.name] + g.add((prop_uri, RDF.type, OWL.DatatypeProperty)) + g.add((prop_uri, RDFS.domain, class_uri)) + xsd_type = type_mapping.get(prop.type.upper(), XSD.string) + g.add((prop_uri, RDFS.range, xsd_type)) + + # Process relationships -> OWL ObjectProperties + for rel in self.relationships: + rel_uri = base_ns[rel.type] + g.add((rel_uri, RDF.type, OWL.ObjectProperty)) + g.add((rel_uri, RDFS.domain, base_ns[rel.start_node_label])) + g.add((rel_uri, RDFS.range, base_ns[rel.end_node_label])) + + # If relationship has properties, create datatype properties + if rel.key_property: + prop_uri = base_ns[f"{rel.type}_{rel.key_property.name}"] + g.add((prop_uri, RDF.type, OWL.DatatypeProperty)) + g.add((prop_uri, RDFS.domain, rel_uri)) + xsd_type = type_mapping.get(rel.key_property.type.upper(), XSD.string) + g.add((prop_uri, RDFS.range, xsd_type)) + + for prop in rel.properties: + prop_uri = base_ns[f"{rel.type}_{prop.name}"] + g.add((prop_uri, RDF.type, OWL.DatatypeProperty)) + g.add((prop_uri, RDFS.domain, rel_uri)) + xsd_type = type_mapping.get(prop.type.upper(), XSD.string) + g.add((prop_uri, RDFS.range, xsd_type)) + + # Serialize to Turtle format + return g.serialize(format="turtle") + + @classmethod + def from_owl_turtle_str(cls, owl_turtle_str: str) -> "DataModel": + """Convert an OWL Turtle string to a Neo4j Data Model. + + This method parses an OWL ontology and creates a Neo4j data model: + - OWL Classes become Node labels + - OWL DatatypeProperties with Class domains become Node properties + - OWL ObjectProperties become Relationships + - Property domains and ranges are used to infer Node labels and types + """ + # Parse the Turtle string + g = Graph() + g.parse(data=owl_turtle_str, format="turtle") + + # Map XSD types back to Neo4j types + xsd_to_neo4j = { + str(XSD.string): "STRING", + str(XSD.integer): "INTEGER", + str(XSD.float): "FLOAT", + str(XSD.boolean): "BOOLEAN", + str(XSD.date): "DATE", + str(XSD.dateTime): "DATETIME", + str(XSD.time): "TIME", + str(XSD.duration): "DURATION", + str(XSD.long): "LONG", + str(XSD.double): "DOUBLE", + } + + # Extract OWL Classes -> Nodes + classes = set() + for s in g.subjects(RDF.type, OWL.Class): + classes.add(str(s).split("#")[-1].split("/")[-1]) + + # Extract DatatypeProperties + datatype_props = {} + for prop in g.subjects(RDF.type, OWL.DatatypeProperty): + prop_name = str(prop).split("#")[-1].split("/")[-1] + domains = list(g.objects(prop, RDFS.domain)) + ranges = list(g.objects(prop, RDFS.range)) + + domain_name = str(domains[0]).split("#")[-1].split("/")[-1] if domains else None + range_type = xsd_to_neo4j.get(str(ranges[0]), "STRING") if ranges else "STRING" + + if domain_name: + if domain_name not in datatype_props: + datatype_props[domain_name] = [] + datatype_props[domain_name].append({ + "name": prop_name, + "type": range_type + }) + + # Extract ObjectProperties -> Relationships + object_props = [] + for prop in g.subjects(RDF.type, OWL.ObjectProperty): + prop_name = str(prop).split("#")[-1].split("/")[-1] + domains = list(g.objects(prop, RDFS.domain)) + ranges = list(g.objects(prop, RDFS.range)) + + if domains and ranges: + domain_name = str(domains[0]).split("#")[-1].split("/")[-1] + range_name = str(ranges[0]).split("#")[-1].split("/")[-1] + + object_props.append({ + "type": prop_name, + "start_node_label": domain_name, + "end_node_label": range_name + }) + + # Create Nodes + nodes = [] + for class_name in classes: + props_for_class = datatype_props.get(class_name, []) + + # Use the first property as key property, or create a default one + if props_for_class: + key_prop = Property( + name=props_for_class[0]["name"], + type=props_for_class[0]["type"] + ) + other_props = [ + Property(name=p["name"], type=p["type"]) + for p in props_for_class[1:] + ] + else: + # Create a default key property + key_prop = Property(name=f"{class_name.lower()}Id", type="STRING") + other_props = [] + + nodes.append(Node( + label=class_name, + key_property=key_prop, + properties=other_props + )) + + # Create Relationships + relationships = [] + for obj_prop in object_props: + relationships.append(Relationship( + type=obj_prop["type"], + start_node_label=obj_prop["start_node_label"], + end_node_label=obj_prop["end_node_label"] + )) + + return cls(nodes=nodes, relationships=relationships) def get_node_cypher_ingest_query_for_many_records(self, node_label: str) -> str: "Generate a Cypher query to ingest a list of Node records into a Neo4j database." diff --git a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py index bb84a89..26ae809 100644 --- a/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py +++ b/servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py @@ -328,6 +328,18 @@ def list_example_data_models() -> dict[str, Any]: "total_examples": len(examples), "usage": "Use the get_example_data_model tool with any of the example names above to get a specific data model", } + + @mcp.tool(name=namespace_prefix + "load_from_owl_turtle") + def load_from_owl_turtle(owl_turtle_str: str) -> DataModel: + """Load a data model from an OWL Turtle string. Returns a DataModel object.""" + logger.info("Loading a data model from an OWL Turtle string.") + return DataModel.from_owl_turtle_str(owl_turtle_str) + + @mcp.tool(name=namespace_prefix + "export_to_owl_turtle") + def export_to_owl_turtle(data_model: DataModel) -> str: + """Export a data model to an OWL Turtle string. Returns a string.""" + logger.info("Exporting a data model to an OWL Turtle string.") + return data_model.to_owl_turtle_str() @mcp.prompt(title="Create New Data Model") def create_new_data_model( diff --git a/servers/mcp-neo4j-data-modeling/tests/resources/blueplaques.ttl b/servers/mcp-neo4j-data-modeling/tests/resources/blueplaques.ttl new file mode 100644 index 0000000..c8406f9 --- /dev/null +++ b/servers/mcp-neo4j-data-modeling/tests/resources/blueplaques.ttl @@ -0,0 +1,113 @@ +@prefix : . +@prefix owl: . +@prefix rdfs: . +@prefix xsd: . + + a owl:Ontology . + +:COMPOSED a owl:ObjectProperty ; + rdfs:domain :Person ; + rdfs:range :MusicalComposition . + +:HONORED_BY a owl:ObjectProperty ; + rdfs:domain :Person ; + rdfs:range :Plaque . + +:LOCATED_AT a owl:ObjectProperty ; + rdfs:domain :Plaque ; + rdfs:range :Address . + +:addressId a owl:DatatypeProperty ; + rdfs:domain :Address ; + rdfs:range xsd:string . + +:area a owl:DatatypeProperty ; + rdfs:domain :Address ; + rdfs:range xsd:string . + +:birthYear a owl:DatatypeProperty ; + rdfs:domain :Person ; + rdfs:range xsd:integer . + +:borough a owl:DatatypeProperty ; + rdfs:domain :Address ; + rdfs:range xsd:string . + +:compositionId a owl:DatatypeProperty ; + rdfs:domain :MusicalComposition ; + rdfs:range xsd:string . + +:deathYear a owl:DatatypeProperty ; + rdfs:domain :Person ; + rdfs:range xsd:integer . + +:erectionYear a owl:DatatypeProperty ; + rdfs:domain :Plaque ; + rdfs:range xsd:integer . + +:genre a owl:DatatypeProperty ; + rdfs:domain :MusicalComposition ; + rdfs:range xsd:string . + +:inscription a owl:DatatypeProperty ; + rdfs:domain :Plaque ; + rdfs:range xsd:string . + +:material a owl:DatatypeProperty ; + rdfs:domain :Plaque ; + rdfs:range xsd:string . + +:name a owl:DatatypeProperty ; + rdfs:domain :Organization, + :Person ; + rdfs:range xsd:string . + +:nationality a owl:DatatypeProperty ; + rdfs:domain :Person ; + rdfs:range xsd:string . + +:organizationType a owl:DatatypeProperty ; + rdfs:domain :Organization ; + rdfs:range xsd:string . + +:personId a owl:DatatypeProperty ; + rdfs:domain :Person ; + rdfs:range xsd:string . + +:plaqueId a owl:DatatypeProperty ; + rdfs:domain :Plaque ; + rdfs:range xsd:string . + +:postcode a owl:DatatypeProperty ; + rdfs:domain :Address ; + rdfs:range xsd:string . + +:profession a owl:DatatypeProperty ; + rdfs:domain :Person ; + rdfs:range xsd:string . + +:professionCategory a owl:DatatypeProperty ; + rdfs:domain :Person ; + rdfs:range xsd:string . + +:streetAddress a owl:DatatypeProperty ; + rdfs:domain :Address ; + rdfs:range xsd:string . + +:title a owl:DatatypeProperty ; + rdfs:domain :MusicalComposition ; + rdfs:range xsd:string . + +:yearComposed a owl:DatatypeProperty ; + rdfs:domain :MusicalComposition ; + rdfs:range xsd:integer . + +:Organization a owl:Class . + +:MusicalComposition a owl:Class . + +:Address a owl:Class . + +:Plaque a owl:Class . + +:Person a owl:Class . \ No newline at end of file diff --git a/servers/mcp-neo4j-data-modeling/tests/unit/test_data_model.py b/servers/mcp-neo4j-data-modeling/tests/unit/test_data_model.py index 71a8ed4..1fe8adc 100644 --- a/servers/mcp-neo4j-data-modeling/tests/unit/test_data_model.py +++ b/servers/mcp-neo4j-data-modeling/tests/unit/test_data_model.py @@ -650,3 +650,140 @@ def test_get_cypher_constraints_query(valid_data_model: DataModel): queries[1] == "CREATE CONSTRAINT Place_constraint IF NOT EXISTS FOR (n:Place) REQUIRE (n.id) IS NODE KEY;" ) + + +def test_data_model_to_owl_turtle_str(): + """Test converting a data model to an OWL Turtle string.""" + nodes = [ + Node( + label="Person", + key_property=Property( + name="personId", type="STRING", description="Unique identifier" + ), + properties=[ + Property(name="name", type="STRING", description="Name of the person"), + Property(name="birthYear", type="INTEGER", description="Birth year"), + ], + ), + Node( + label="Address", + key_property=Property( + name="addressId", type="STRING", description="Unique identifier" + ), + properties=[ + Property(name="streetAddress", type="STRING", description="Street address"), + ], + ), + ] + relationships = [ + Relationship( + type="LIVES_AT", + start_node_label="Person", + end_node_label="Address", + properties=[], + ), + ] + + data_model = DataModel(nodes=nodes, relationships=relationships) + turtle_str = data_model.to_owl_turtle_str() + + # Basic checks to ensure the turtle string contains expected elements + assert "owl:Ontology" in turtle_str + assert ":Person" in turtle_str + assert ":Address" in turtle_str + assert ":LIVES_AT" in turtle_str + assert ":personId" in turtle_str + assert ":name" in turtle_str + assert ":birthYear" in turtle_str + assert "owl:Class" in turtle_str + assert "owl:ObjectProperty" in turtle_str + assert "owl:DatatypeProperty" in turtle_str + + +def test_data_model_from_owl_turtle_str(): + """Test converting an OWL Turtle string to a data model.""" + # Read the test TTL file + import pathlib + ttl_file = pathlib.Path(__file__).parent.parent / "resources" / "blueplaques.ttl" + with open(ttl_file, "r") as f: + turtle_str = f.read() + + data_model = DataModel.from_owl_turtle_str(turtle_str) + + # Check that nodes were created + assert len(data_model.nodes) > 0 + + # Check for expected classes + node_labels = {n.label for n in data_model.nodes} + assert "Person" in node_labels + assert "Address" in node_labels + assert "Plaque" in node_labels + assert "MusicalComposition" in node_labels + assert "Organization" in node_labels + + # Check for expected relationships + assert len(data_model.relationships) > 0 + relationship_types = {r.type for r in data_model.relationships} + assert "COMPOSED" in relationship_types + assert "HONORED_BY" in relationship_types + assert "LOCATED_AT" in relationship_types + + # Check that Person node has properties + person_node = next((n for n in data_model.nodes if n.label == "Person"), None) + assert person_node is not None + assert person_node.key_property is not None + + # Check for expected properties on Person + all_person_props = [person_node.key_property.name] + [p.name for p in person_node.properties] + assert "personId" in all_person_props + assert any("name" in prop.lower() or "nationality" in prop.lower() or "profession" in prop.lower() + for prop in all_person_props) + + +def test_data_model_owl_turtle_round_trip(): + """Test converting a data model to OWL Turtle and back.""" + nodes = [ + Node( + label="Person", + key_property=Property(name="personId", type="STRING"), + properties=[ + Property(name="name", type="STRING"), + Property(name="age", type="INTEGER"), + ], + ), + Node( + label="Company", + key_property=Property(name="companyId", type="STRING"), + properties=[ + Property(name="companyName", type="STRING"), + ], + ), + ] + relationships = [ + Relationship( + type="WORKS_FOR", + start_node_label="Person", + end_node_label="Company", + properties=[], + ), + ] + + original_model = DataModel(nodes=nodes, relationships=relationships) + + # Convert to Turtle + turtle_str = original_model.to_owl_turtle_str() + + # Convert back to DataModel + restored_model = DataModel.from_owl_turtle_str(turtle_str) + + # Check that basic structure is preserved + assert len(restored_model.nodes) == len(original_model.nodes) + assert len(restored_model.relationships) == len(original_model.relationships) + + restored_labels = {n.label for n in restored_model.nodes} + original_labels = {n.label for n in original_model.nodes} + assert restored_labels == original_labels + + restored_rel_types = {r.type for r in restored_model.relationships} + original_rel_types = {r.type for r in original_model.relationships} + assert restored_rel_types == original_rel_types diff --git a/servers/mcp-neo4j-data-modeling/uv.lock b/servers/mcp-neo4j-data-modeling/uv.lock index 8a199f4..cadcfed 100644 --- a/servers/mcp-neo4j-data-modeling/uv.lock +++ b/servers/mcp-neo4j-data-modeling/uv.lock @@ -749,6 +749,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c", size = 8074 }, ] +[[package]] +name = "isodate" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 }, +] + [[package]] name = "jedi" version = "0.19.2" @@ -870,6 +879,7 @@ source = { editable = "." } dependencies = [ { name = "fastmcp" }, { name = "pydantic" }, + { name = "rdflib" }, { name = "starlette" }, ] @@ -888,6 +898,7 @@ dev = [ requires-dist = [ { name = "fastmcp", specifier = ">=2.0.0" }, { name = "pydantic", specifier = ">=2.10.1" }, + { name = "rdflib", specifier = ">=7.0.0" }, { name = "starlette", specifier = ">=0.47.0" }, ] @@ -1376,6 +1387,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] +[[package]] +name = "pyparsing" +version = "3.2.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/181488fc2b9d093e3972d2a472855aae8a03f000592dbfce716a512b3359/pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6", size = 1099274 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/5e/1aa9a93198c6b64513c9d7752de7422c06402de6600a8767da1524f9570b/pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e", size = 113890 }, +] + [[package]] name = "pyright" version = "1.1.401" @@ -1550,6 +1570,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/9c/d8073bd898eb896e94c679abe82e47506e2b750eb261cf6010ced869797c/pyzmq-26.4.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a222ad02fbe80166b0526c038776e8042cd4e5f0dec1489a006a1df47e9040e0", size = 555371 }, ] +[[package]] +name = "rdflib" +version = "7.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "isodate", marker = "python_full_version < '3.11'" }, + { name = "pyparsing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c8/cd/01e49acce660e7f9d161f8885004313f9973cbc5ccbcb11888c147f3fa3a/rdflib-7.3.0.tar.gz", hash = "sha256:2da6a5d3d0da2d095dd7de49e388db1c97542efc035bda4000c154d2b6cf8a6e", size = 4697755 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/96/da4ade02b6ade99edba8cb8659b316b4ca2c6f7ba5942ed184853e54a97c/rdflib-7.3.0-py3-none-any.whl", hash = "sha256:501e29710f1f7f8e5a0b075483050ef512da5130d97f255bd57789424d18e643", size = 566857 }, +] + [[package]] name = "referencing" version = "0.36.2"