Skip to content

Commit c132757

Browse files
committed
add code generation, ingest process resource
1 parent ad3da19 commit c132757

File tree

7 files changed

+342
-19
lines changed

7 files changed

+342
-19
lines changed

servers/mcp-neo4j-data-modeling/CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
### Changed
66

77
### Added
8-
8+
* Code generation tools for ingestion queries
9+
* Resource that explains the recommended process of ingesting data into Neo4j
910

1011
## v0.1.0
1112

servers/mcp-neo4j-data-modeling/claude_desktop_config.json

Lines changed: 0 additions & 16 deletions
This file was deleted.

servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/data_model.py

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def get_mermaid_config_str(self) -> str:
170170
"Get the Mermaid configuration string for the node."
171171
props = [f"<br/>{self.key_property.name}: {self.key_property.type} | KEY"]
172172
props.extend([f"<br/>{p.name}: {p.type}" for p in self.properties])
173-
return f'{self.label}["{self.label}{''.join(props)}"]'
173+
return f'{self.label}["{self.label}{"".join(props)}"]'
174174

175175
@classmethod
176176
def from_arrows(cls, arrows_node_dict: dict[str, Any]) -> "Node":
@@ -214,6 +214,25 @@ def to_arrows(
214214
"caption": self.metadata.get("caption", ""),
215215
}
216216

217+
def get_cypher_ingest_query_for_many_records(self) -> str:
218+
"""
219+
Generate a Cypher query to ingest a list of Node records into a Neo4j database.
220+
This query takes a parameter $records that is a list of dictionaries, each representing a Node record.
221+
"""
222+
formatted_props = ", ".join(
223+
[f"{p.name}: record.{p.name}" for p in self.properties]
224+
)
225+
return f"""UNWIND $records as record
226+
MERGE (n: {self.label} {{{self.key_property.name}: record.{self.key_property.name}}})
227+
SET n += {{{formatted_props}}}"""
228+
229+
def get_cypher_constraint_query(self) -> str:
230+
"""
231+
Generate a Cypher query to create a NODE KEY constraint on the node.
232+
This creates a range index on the key property of the node and enforces uniqueness and existence of the key property.
233+
"""
234+
return f"CREATE CONSTRAINT {self.label}_constraint IF NOT EXISTS FOR (n:{self.label}) REQUIRE (n.{self.key_property.name}) IS NODE KEY"
235+
217236

218237
class Relationship(BaseModel):
219238
"A Neo4j Relationship."
@@ -347,6 +366,41 @@ def to_arrows(self) -> dict[str, Any]:
347366
"style": self.metadata.get("style", {}),
348367
}
349368

369+
def get_cypher_ingest_query_for_many_records(
370+
self, start_node_key_property_name: str, end_node_key_property_name: str
371+
) -> str:
372+
"""
373+
Generate a Cypher query to ingest a list of Relationship records into a Neo4j database.
374+
The sourceId and targetId properties are used to match the start and end nodes.
375+
This query takes a parameter $records that is a list of dictionaries, each representing a Relationship record.
376+
"""
377+
formatted_props = ", ".join(
378+
[f"{p.name}: record.{p.name}" for p in self.properties]
379+
)
380+
key_prop = (
381+
f" {{{self.key_property.name}: record.{self.key_property.name}}}"
382+
if self.key_property
383+
else ""
384+
)
385+
query = f"""UNWIND $records as record
386+
MATCH (start: {self.start_node_label} {{{start_node_key_property_name}: record.sourceId}})
387+
MATCH (end: {self.end_node_label} {{{end_node_key_property_name}: record.targetId}})
388+
MERGE (start)-[:{self.type}{key_prop}]->(end)"""
389+
if formatted_props:
390+
query += f"""
391+
SET end += {{{formatted_props}}}"""
392+
return query
393+
394+
def get_cypher_constraint_query(self) -> str | None:
395+
"""
396+
Generate a Cypher query to create a RELATIONSHIP KEY constraint on the relationship.
397+
This creates a range index on the key property of the relationship and enforces uniqueness and existence of the key property.
398+
"""
399+
if self.key_property:
400+
return f"CREATE CONSTRAINT {self.type}_constraint IF NOT EXISTS FOR ()-[r:{self.type}]->() REQUIRE (r.{self.key_property.name}) IS RELATIONSHIP KEY"
401+
else:
402+
return None
403+
350404

351405
class DataModel(BaseModel):
352406
"A Neo4j Graph Data Model."
@@ -403,6 +457,16 @@ def validate_relationships(
403457

404458
return relationships
405459

460+
@property
461+
def nodes_dict(self) -> dict[str, Node]:
462+
"Return a dictionary of the nodes of the data model. {node_label: node_dict}"
463+
return {n.label: n for n in self.nodes}
464+
465+
@property
466+
def relationships_dict(self) -> dict[str, Relationship]:
467+
"Return a dictionary of the relationships of the data model. {relationship_pattern: relationship_dict}"
468+
return {r.pattern: r for r in self.relationships}
469+
406470
def add_node(self, node: Node) -> None:
407471
"Add a new node to the data model."
408472
if node.label in [n.label for n in self.nodes]:
@@ -520,3 +584,40 @@ def to_arrows_dict(self) -> dict[str, Any]:
520584
def to_arrows_json_str(self) -> str:
521585
"Convert the data model to an Arrows Data Model JSON string."
522586
return json.dumps(self.to_arrows_dict(), indent=2)
587+
588+
def get_node_cypher_ingest_query_for_many_records(self, node_label: str) -> str:
589+
"Generate a Cypher query to ingest a list of Node records into a Neo4j database."
590+
node = self.nodes_dict[node_label]
591+
return node.get_cypher_ingest_query_for_many_records()
592+
593+
def get_relationship_cypher_ingest_query_for_many_records(
594+
self,
595+
relationship_type: str,
596+
relationship_start_node_label: str,
597+
relationship_end_node_label: str,
598+
) -> str:
599+
"Generate a Cypher query to ingest a list of Relationship records into a Neo4j database."
600+
pattern = _generate_relationship_pattern(
601+
relationship_start_node_label,
602+
relationship_type,
603+
relationship_end_node_label,
604+
)
605+
relationship = self.relationships_dict[pattern]
606+
start_node = self.nodes_dict[relationship.start_node_label]
607+
end_node = self.nodes_dict[relationship.end_node_label]
608+
return relationship.get_cypher_ingest_query_for_many_records(
609+
start_node.key_property.name, end_node.key_property.name
610+
)
611+
612+
def get_cypher_constraints_query(self) -> list[str]:
613+
"""
614+
Generate a list of Cypher queries to create constraints on the data model.
615+
This creates range indexes on the key properties of the nodes and relationships and enforces uniqueness and existence of the key properties.
616+
"""
617+
node_queries = [n.get_cypher_constraint_query() + ";" for n in self.nodes]
618+
relationship_queries = [
619+
r.get_cypher_constraint_query() + ";"
620+
for r in self.relationships
621+
if r.key_property is not None
622+
]
623+
return node_queries + relationship_queries

servers/mcp-neo4j-data-modeling/src/mcp_neo4j_data_modeling/server.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@
44
from typing import Any, Literal
55

66
from mcp.server.fastmcp import FastMCP
7-
from pydantic import ValidationError
7+
from pydantic import Field, ValidationError
88

99
from .data_model import (
1010
DataModel,
1111
Node,
1212
Property,
1313
Relationship,
1414
)
15+
from .static import DATA_INGEST_PROCESS
1516

1617
logger = logging.getLogger("mcp_neo4j_data_modeling")
1718

@@ -47,6 +48,12 @@ def data_model_schema() -> dict[str, Any]:
4748
logger.info("Getting the schema for a data model.")
4849
return DataModel.model_json_schema()
4950

51+
@mcp.resource("resource://static/neo4j_data_ingest_process")
52+
def neo4j_data_ingest_process() -> str:
53+
"""Get the process for ingesting data into a Neo4j database."""
54+
logger.info("Getting the process for ingesting data into a Neo4j database.")
55+
return DATA_INGEST_PROCESS
56+
5057
@mcp.tool()
5158
def validate_node(
5259
node: Node, return_validated: bool = False
@@ -152,6 +159,58 @@ def get_mermaid_config_str(data_model: DataModel) -> str:
152159
raise ValueError(f"Validation error: {e}")
153160
return dm_validated.get_mermaid_config_str()
154161

162+
@mcp.tool()
163+
def get_node_cypher_ingest_query_for_many_records(
164+
node: Node = Field(description="The node to get the Cypher query for."),
165+
) -> str:
166+
"""
167+
Get the Cypher query to ingest a list of Node records into a Neo4j database.
168+
This should be used to ingest data into a Neo4j database.
169+
This is a parameterized Cypher query that takes a list of records as input to the $records parameter.
170+
"""
171+
logger.info(
172+
f"Getting the Cypher query to ingest a list of Node records into a Neo4j database for node {node.label}."
173+
)
174+
return node.get_cypher_ingest_query_for_many_records()
175+
176+
@mcp.tool()
177+
def get_relationship_cypher_ingest_query_for_many_records(
178+
data_model: DataModel = Field(
179+
description="The data model snippet that contains the relationship, start node and end node."
180+
),
181+
relationship_type: str = Field(
182+
description="The type of the relationship to get the Cypher query for."
183+
),
184+
relationship_start_node_label: str = Field(
185+
description="The label of the relationship start node."
186+
),
187+
relationship_end_node_label: str = Field(
188+
description="The label of the relationship end node."
189+
),
190+
) -> str:
191+
"""
192+
Get the Cypher query to ingest a list of Relationship records into a Neo4j database.
193+
This should be used to ingest data into a Neo4j database.
194+
This is a parameterized Cypher query that takes a list of records as input to the $records parameter.
195+
The records must contain the Relationship properties, if any, as well as the sourceId and targetId properties of the start and end nodes respectively.
196+
"""
197+
logger.info(
198+
"Getting the Cypher query to ingest a list of Relationship records into a Neo4j database."
199+
)
200+
return data_model.get_relationship_cypher_ingest_query_for_many_records(
201+
relationship_type,
202+
relationship_start_node_label,
203+
relationship_end_node_label,
204+
)
205+
206+
@mcp.tool()
207+
def get_constraints_cypher_queries(data_model: DataModel) -> list[str]:
208+
"Get the Cypher queries to create constraints on the data model. This creates range indexes on the key properties of the nodes and relationships and enforces uniqueness and existence of the key properties."
209+
logger.info(
210+
"Getting the Cypher queries to create constraints on the data model."
211+
)
212+
return data_model.get_cypher_constraints_query()
213+
155214
return mcp
156215

157216

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
DATA_INGEST_PROCESS = """
2+
Follow these steps when ingesting data into Neo4j.
3+
1. Create constraints before loading any data.
4+
2. Load all nodes before relationships.
5+
3. Then load relationships serially to avoid deadlocks.
6+
"""

servers/mcp-neo4j-data-modeling/tests/unit/conftest.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import pytest
44

5+
from mcp_neo4j_data_modeling.data_model import DataModel, Node, Property, Relationship
6+
57

68
@pytest.fixture(scope="function")
79
def arrows_data_model_dict() -> dict[str, Any]:
@@ -130,3 +132,36 @@ def arrows_data_model_dict() -> dict[str, Any]:
130132
},
131133
],
132134
}
135+
136+
137+
@pytest.fixture(scope="function")
138+
def valid_data_model() -> DataModel:
139+
"A simple valid data model with a Person node, a Place node, and a LIVES_IN relationship."
140+
nodes = [
141+
Node(
142+
label="Person",
143+
key_property=Property(
144+
name="id", type="STRING", description="Unique identifier"
145+
),
146+
properties=[
147+
Property(name="name", type="STRING", description="Name of the person"),
148+
Property(name="age", type="INTEGER", description="Age of the person"),
149+
],
150+
),
151+
Node(
152+
label="Place",
153+
key_property=Property(
154+
name="id", type="STRING", description="Unique identifier"
155+
),
156+
properties=[
157+
Property(name="name", type="STRING", description="Name of the place")
158+
],
159+
),
160+
]
161+
162+
relationship = Relationship(
163+
type="LIVES_IN",
164+
start_node_label="Person",
165+
end_node_label="Place",
166+
)
167+
return DataModel(nodes=nodes, relationships=[relationship])

0 commit comments

Comments
 (0)